def obtener_tipo_palabra(palabra): """ Valida que la palabra exista. En caso afirmativo, retorna el tipo de palabra (Adjetivo, sustantivo o verbo). Caso negativo, retorna vacío. Args: palabra (str): Palabra a validar. Returns: Tipo (str): Tipo de palabra, siendo estos 'A' para Adjetivo, 'S' para Sustantivo y 'V' para Verbo. En caso de que la palabra no exista, retorna vacío. """ try: parser = WiktionaryParser() parser.set_default_language('spanish') word = parser.fetch(palabra) tipo_palabra = word[0]['definitions'][0]['partOfSpeech'] if (tipo_palabra == 'adjective'): return (ADJETIVOS) elif (tipo_palabra == 'noun'): return (SUSTANTIVOS) elif (tipo_palabra == 'verb'): return (VERBOS) else: return ('') except IndexError: return ('')
def is_noun(word, country_code): """ This method takes word and country_code and returns boolean value based on the word being noun Args: word: String country_code: String Returns: Boolean """ if country_code == 'EN': for tmp in nltk.corpus.wordnet.synsets(word): if tmp.name().split('.')[0] == word and tmp.name().split( '.')[1] == 'n': return True return False if country_code == 'PL': parser = WiktionaryParser() try: another_word = parser.fetch( word, 'polish')[0]['definitions'][0]['partOfSpeech'] if another_word == "noun": return True else: return False except: return False
class WikiTagger: def __init__(self): self.wp = WiktionaryParser() self.wp.set_default_language('portuguese') def fetch(self, tokens): for x in tokens: if x.pos == 'TEMP': #or isinstance(x.pos,list): try: #parts = x.pos if isinstance(x.pos, list) else [] #persistencia parts = [] p = self.wp.fetch(x.symbol) for k in p: for y in k['definitions']: gender = '' if 'm' in y['text'][0].split(): gender = 'MASC' elif 'f' in y['text'][0].split(): gender = 'FEMI' else: gender = 'DESC' coiso = PartOfSpeech(y['partOfSpeech'],gender, "TEMP") parts.append(coiso) if parts: x.pos = parts else: x.pos = [PartOfSpeech('proper noun','DESC','TEMP'),] #x.pos = wp.fetch(x.symbol)[0]['definitions'][0]['partOfSpeech'] except Exception as e: print(e) x.pos = "ERROR" return tokens
def get_info_from_wiktionary(one_word): parser = WiktionaryParser() word = parser.fetch(one_word, 'german') definitions = word[0]['definitions'] print(definitions)
class WiktionarySkill(MycroftSkill): def __init__(self): MycroftSkill.__init__(self) def initialize(self): self.register_entity_file("word.entity") self.parser = WiktionaryParser() @intent_file_handler('fallback.wiktionary.definition.intent') def handle_wiktionary_definition(self, message): #Get word to define from utterance word = message.data.get('word') #Lookup the word using Wiktionary get_word_info = self.parser.fetch(word) #Speak definition for requested word back to user try: # Get first definition from wiktionary response response = get_word_info[0]['definitions'][0]['text'][1] # Log the definition LOG.info(response) self.speak_dialog('fallback.wiktionary', { 'word': word, 'definition': response }) except: self.speak_dialog('error.wiktionary')
def getDef(word): dictionary = PyDictionary() definition = dictionary.meaning(word) if isinstance(definition, dict) and 'Noun' in definition: defs = definition['Noun'] if isinstance(defs, list) and len(defs) > 0: return defs # wordnik dictionary wordApi = WordApi.WordApi(client) definitions = (wordApi.getDefinitions(word, partOfSpeech='noun', limit=3)) if definitions is not None and len(definitions) > 0: return [(definition.text).lower() for definition in definitions] meaningsList = vocabulary.meaning(word) if meaningsList != False: defs = json.loads(meaningsList) if (len(defs) > 0): definitions = [] for definition in defs: if definition['text']: d = re.sub('<[^<]+?>', '', definition['text']) definitions.append(d.lower()) if len(definitions) > 0: return definitions # owlbot api url = 'https://owlbot.info/api/v2/dictionary/' + word r = requests.get(url) if r is not None: try: result = r.json() if len(result) > 0: definitions = [] for item in result: if (item['type'] == 'noun' and item['definition']): definitions.append(item['definition'].lower()) if len(definitions) > 0: return definitions except: pass # wiktionary try: parser = WiktionaryParser() result = parser.fetch(word) if result is not None: definition = result[0]['definitions'] if definition and len(definition) > 0: definition = definition[0] if 'partOfSpeech' in definition: if definition['partOfSpeech'] == 'noun': defs = definition['text'].lower().split('\n') if len(defs) > 1: return defs[0:2] elif len(defs) == 1: return defs except: return ' ' return ' '
async def run(self, message: discord.Message, trigger: str, client: discord.Client): word = message.content[len(trigger):] # string parser = WiktionaryParser() fetched = parser.fetch( "word", language="english" ) # default could also be used as default is english print(fetched)
def get_ipa(word): from wiktionaryparser import WiktionaryParser parser = WiktionaryParser() another_word = parser.fetch(word, 'french') try: print(another_word[0]['pronunciations']['text'][0]) except IndexError: print("WIKI ERROR: THE INDEX IS OUT OF RANGE")
def get_etymology_trees(word_str: str, language: str) -> List[Word]: parser = WiktionaryParser() word_info = parser.fetch(word_str, language) word_origins: List[Word] = [] for origin in word_info: word = Word(word_str, language) parse(origin['etymology']) return word_origins
def requestAPI(word="test"): """ Query wiktionary API for json results using WiktionaryParser Args: word: queried word; default value, "test". Returns: word queried, and JSON table of queried word """ parser = WiktionaryParser() word_json = parser.fetch(word) return word, word_json
def get_wiktionary_entry(language, word): """Interface to the requestion something from wiktionary. Arguments: language = language of which we want the entry word = word of which we want the entry Returns: parsed wiktionary page """ parser = WiktionaryParser() parser.set_default_language(language) return parser.fetch(word)
def _scrape_parser_info(cls, idiom_id: str) -> Optional[dict]: logger = logging.getLogger("_scrape_parser_1_info") # using this wiktionary parser parser = WiktionaryParser() # include alternative forms as well (e.g. beat around the bush = beat about the bush) parser.include_relation('alternative forms') try: idiom_info = parser.fetch(idiom_id) except AttributeError as ae: logger.warning(str(ae)) return None else: return idiom_info
def definition_english(buff): from wiktionaryparser import WiktionaryParser import json parser = WiktionaryParser() word = json.dumps(parser.fetch(buff),sort_keys=True, indent=4) data = json.loads(word) c = "" try: for i in data[0]["definitions"][0]["text"]: c += i + "\n" return c except IndexError: return
def define_word(word): parser = WiktionaryParser() json_word = parser.fetch(word, 'yiddish') d = [] try: #error handling for when wiktionaryparse returns an empty set/list d = json_word[0] return d["definitions"] except: #print("d="+str(d)) #print(type(d)) #print("json_word="+str(json_word)) #print(type(json_word)) return False
def get_wiki_tags(entry): convert = {'adjective':'ADJ', 'adposition':'ADP', 'preposition':'ADP',\ 'adverb': 'ADV', 'auxiliary':'AUX', 'coordinating conjunction': 'CCONJ',\ 'determiner': 'DET', 'interjection':'INTJ', 'noun':'NOUN',\ 'numeral':'NUM', 'particle':'PART', 'pronoun':'PRON', 'proper noun':'PROPN',\ 'punctuation':'PUNCT', 'subordinating conjunction':'SCONJ', 'symbol':'SYM',\ 'verb':'VERB', 'other':'X', 'article':'DET', 'conjunction':'PART'} # ADJ: adjective # ADP: adposition # ADV: adverb # AUX: auxiliary # CCONJ: coordinating conjunction # DET: determiner # INTJ: interjection # NOUN: noun # NUM: numeral # PART: particle # PRON: pronoun # PROPN: proper noun # PUNCT: punctuation # SCONJ: subordinating conjunction # SYM: symbol # VERB: verb # X: other parser = WiktionaryParser() words = entry[1][0] wikitionary_tags = [] for word in words: wiki_pos = 'X' try: results = parser.fetch(word, 'greek') #[0]['definitions'] if results: if results[0]['definitions']: for wiki_idx in range(len(results[0]['definitions'])): wiki_pos = results[0]['definitions'][wiki_idx][ 'partOfSpeech'] if wiki_pos in convert: wiki_pos = convert[wiki_pos] break else: print(f'** cant convert wiki_pos: {wiki_pos}') print(word) print('--------------------------------------') wiki_pos = 'X' except AttributeError as error: print(f'Error: {error}') wikitionary_tags.append(wiki_pos) return wikitionary_tags
class wiktionary(dictionaryAPIBaseClass): def __init__(self): self.parser = WiktionaryParser() def __del__(self): pass def word_definition(self, word): try: retrieved_definitions = self.parser.fetch(word) except Exception as e: print(f'An exception was raised when calling wiki API: {e}') exit(1) definition_list = retrieved_definitions[0]['definitions'][0]['text'] definition = definition_list[random.randrange(1, len(definition_list))] return definition def word_check(self, word): if not self.parser.fetch(word)[0]['definitions']: return False else: return True
class Wiktionary: def __init__(self, bot): self.bot = bot self.parser = WiktionaryParser() self.words = {} self.output = None self.embed = None self.parser.set_default_language('english') def __fetch_word(self, word): self.words = self.parser.fetch(word) @commands.group(brief='Gives you a word\'s etymology, definition, examples etc.') async def word(self, ctx): pass @word.command(brief='Changes the language the command will use.') async def lang(self, ctx, lang): self.parser.set_default_language(lang) language_list = 'https://en.wiktionary.org/wiki/Wiktionary:List_of_languages' await ctx.send(f'Language changed to {lang}.\nThe list of languages can be found here: {language_list}') @word.command(brief='Gives you a word\'s etymologies.', aliases=['e', 'ety']) async def etymology(self, ctx, word): self.__fetch_word(word) title = word description = f'{len(self.words)} results found.' self.embed = discord.Embed(color=ctx.message.author.color, title=title, description=description) for i, word in enumerate(self.words[:3], 1): self.embed.add_field(name=i, value=word['etymology']) await ctx.send(embed=self.embed) @word.command(brief='Gives you example usages for a word.', aliases=['ex']) async def example(self, ctx, word): self.__fetch_word(word) self.output = [str(word['definitions'][0]['examples']) for i, word in enumerate(self.words)][:3] print(self.output) await ctx.send('\n'.join(self.output)) @word.command(brief='Gives you a word\'s definition.', aliases=['d', 'def']) async def definition(self, ctx, word): self.__fetch_word(word) self.output = [str(word['definitions'][0]['text']) for i, word in enumerate(self.words)][:3] print(self.output) await ctx.send('\n'.join(self.output))
def get_wiktionary_entry(language, word): """Interface to the requestion something from wiktionary. Arguments: language = language of which we want the entry word = word of which we want the entry Returns: parsed wiktionary page """ parser = WiktionaryParser() parser.set_default_language(language) try: return parser.fetch(word) except Exception as e: print("problem with word {}, language {}".format(word, language)) print(e) return []
def translate(word: str) -> str: """ connect to wiktionary, get all part of speech, join them into one string, and return here """ global LINE_DIVIDER parser = WiktionaryParser() def_ = parser.fetch(word.lower()) ret = "" for word_payload in def_: definitions = word_payload['definitions'] translations = { d['partOfSpeech']: LINE_DIVIDER.join(d['text']) for d in definitions } ret += LINE_DIVIDER.join(f"{k}: {v}" for k, v in translations.items()) return ret
def fetch_word(word): parser = WiktionaryParser() each_word = word.lower() word_details = parser.fetch(each_word) if len(word_details) and len(word_details[0].get('definitions', [])): word_details = word_details[0] word_definitions = word_details.pop('definitions') priority = 0 pronounciation_details = word_details.pop('pronunciations') audio_links = pronounciation_details.get('audio', []) pronounciations = pronounciation_details.get('text', []) new_word = Word.objects.create(word_english=each_word, pronounciations=pronounciations, audio_links=audio_links) translations = word_details.pop('translations') for each_translation in translations: meaning = each_translation.get('meaning') for language_code in each_translation.get( 'available_translations'): for each_local_word in each_translation.get( 'available_translations').get(language_code): Translation.objects.create( english_word=new_word, meaning=meaning, local_word=each_local_word, utf_encoded=each_local_word.encode('utf-8'), language=language_code) for each_definition in word_definitions: definition_text = each_definition.get('text', []) part_of_speech = each_definition.get('partOfSpeech') examples = each_definition.get('examples') synonyms = [] for each_related in each_definition.get('relatedWords'): if each_related.get('relationshipType', '') == 'synonyms': synonyms = each_related.get('words', []) new_word_definition = WordDefinition.objects.create( english_word=new_word, definitions=definition_text, priority=priority, part_of_speech=part_of_speech, examples=examples, synonyms=synonyms) priority += 1
def parseAndReturn(word): parser = WiktionaryParser() defList = parser.fetch(word)[0]["definitions"] defText = [] if len(defList) == 0: raise AttributeError("CANNOT FIND A DEFINITION") for i in range(len(defList)): if i >= constants.MAX_NUM_DEFINITIONS: break defText.append([defList[i]["partOfSpeech"]]) if debug: print(len(defList[i]["text"])) for j in range(1, len(defList[i]["text"])): if j > constants.MAX_DEPTH_PER_DEF: break defText[i].append(defList[i]["text"][j]) return defText
class WikitionaryDictionary(VimDictionary): def __init__(self): self._parser = WiktionaryParser() def _lookup(self, entry): wikitionary_result = self._parser.fetch(entry) result = self.parse_wikitionary_entry(entry, wikitionary_result) return result.strip() @staticmethod def parse_wikitionary_entry(word, wikiresult): lines = list() # lines.append(word.upper()) for i1, entry in enumerate(wikiresult, start=1): lines.append(word.upper() + '\n') for i2, definition in enumerate(entry['definitions'], start=1): lines.append('{0}. {1}'.format(i2, definition['text'])) return '\n'.join(lines)
def wiktionary_retriever(word_list, lang): retrieved_words = dict() parser = WiktionaryParser() for word in word_list: retrieved_words[word] = parser.fetch(word, lang) wiktionary_dict = dict() for k, v in retrieved_words.items(): try: if len(v) != 0: v = v[0] if len(v['definitions']) != 0: gender, plural = "", "" # Check the information in the text field if lang == "italian": text = v['definitions'][0]['text'][0].replace( k, "").replace("\xa0", "").split(" (plural ") else: text = re.split( r'.*plural ', v['definitions'][0]['text'][0].replace( k, "").replace("\xa0", "")) if len(text) == 1: gender = text[0] elif len(text) >= 2: gender = text[0] if len(text[1].split(")")[0]) > 3: plural = text[1].split(")")[0].replace(")", "") else: pass wiktionary_dict[k] = (clean_text(v['etymology']), v['definitions'][0]['partOfSpeech'], gender, plural) else: wiktionary_dict[k] = [] else: wiktionary_dict[k] = [] except: print("wiktionary err:", k) return wiktionary_dict
class WiktionaryDefiner(Definer): language: str = attr.ib() def __attrs_post_init__(self): self.parser = WiktionaryParser() self.entry = None def lookup_word(self, word): self.entry = self.parser.fetch(word, language=self.language) def get_definition(self, word: str, entry_n: int = 0, definition_n: int = 0): self.lookup_word(word=word) if self.entry == []: print(f"No {self.language} definition found for: '{word}'") return None entry = self.entry[entry_n] definitions = entry["definitions"] try: definition = definitions[definition_n] except IndexError: print(f"No definition found for: '{word}'") return None text = definition["text"] definition = text[1] return definition def define(self, word): result = self.get_definition(word) if result is None: lower = word.lower() if lower != word: result = self.get_definition(lower) return result
def get_info(w, lang=None, parser=None, debug=False, postfix=None): try: if parser is None: parser = WiktionaryParser() info = parser.fetch(w, lang)[0]['definitions'] pos_list, morph_list = [], [] for info_dct in info: pos = info_dct['partOfSpeech'] txt = info_dct['text'] pos_list.append(pos) morph_list.append(wp_morph(txt, pos)) result_dict = { 'w': w, 'src': lang[:2], 'pos_wp': pos_list, 'morph': morph_list } return {k + postfix: v for k, v in result_dict.items()} \ if is_(postfix) else result_dict except: if debug: tb.print_exc() return {}
def new_word_db_fetch(words, wik_word_index=0, wik_nonword_index=0): # setup Wiktionary Parser wik_parser = WiktionaryParser() wik_parser.set_default_language('danish') wik_parser.RELATIONS = [] wik_parser.PARTS_OF_SPEECH = [ "noun", "verb", "adjective", "adverb", "proper noun" ] new_words = [] new_nonwords = [] for word in tqdm(words): try: data = wik_parser.fetch(word) if len(data) == 0: new_nonwords.append(word) else: new_words.append(word) except AttributeError: print("something went wrong, with fidning a word on WikWord.") continue csv_append('word_datasets/wik_nonwords.csv', new_nonwords, wik_nonword_index) csv_append('word_datasets/wik_words.csv', new_words, wik_word_index) return new_words, new_nonwords
#process_ending_words = [ noun for noun in oneword['word'].tolist() \ # if noun.endswith('tion') or noun.endswith('sion') or\ # noun.endswith('ing') or noun.endswith('age') or\ # (noun.endswith('y') and not noun.endswith('ty')) or \ # noun.endswith('ance') or\ # noun.endswith('al') or noun.endswith('sis')] # wiktionary POS analysis section_types = [] attribute_list = [] named_system_list = [] process_verb_list = [] attribute_verb_list = [] oneword['pos'] = '' for word in oneword['word'].tolist(): w = parser.fetch(word) pos = [] #for e in range(len(w)): #loop over definitions for each etymology ## use first etymology only for now! if len(w) > 0: for d in range(len(w[0]['definitions'])): pos.append(w[0]['definitions'][d]['partOfSpeech']) oneword.loc[oneword['word'] == word, 'pos'] = ', '.join(pos).rstrip(', ') # if len(a)==1 and a[0]=='Adjective': # attribute_list.append(word) # elif len(a)==1 and a[0]=='Proper noun': # named_system_list.append(word) # elif len(a)==1 and a[0]=='Verb': # process_verb_list.append(word)
def fetch_word(ui): parser = WiktionaryParser() parser.set_default_language('hungarian') input = ui.lineEdit.text() word = parser.fetch(input) return word
from wiktionaryparser import WiktionaryParser from pprint import pprint parser = WiktionaryParser() word = parser.fetch('test') pprint(word)
raise Exception('response has an unexpected number of fields') if 'error' not in response: raise Exception('response is missing required error field') if 'result' not in response: raise Exception('response is missing required result field') if response['error'] is not None: raise Exception(response['error']) return response['result'] parser = WiktionaryParser() ws = input("Enter comma separated words: ").split(',') # parser.set_default_language('German') for w in ws: word = parser.fetch(w) audio_file = 'a' audio = [] try: audio.append({ "url": "https:" + word[0]['pronunciations']['audio'][0], "filename": w + "-en.mp3", # en English suffix "fields": ["Audio"] }) except IndexError: pass try: definition = word[0]['definitions'][0] except IndexError:
def getDefinition(word): parser = WiktionaryParser() data = parser.fetch(word) return data