def get_duden_dict(self, word): self.log.debug("Looking for: %s", word) ddict = {} match = duden.get(word) if match is None: self.log.warning("[ ? ] Word '%s' not found in duden cache", word) found = duden.search(word, exact=False) if len(found) > 0: match = found[0] self.log.warning("[ < ] Word '%s' found in duden dictionary", word) try: ddict = match.export() self.log.info("[ + ] Word '%s' added to global cache", word) except: self.log.error("[ ! ] Something went wrong with word '%s", word) else: self.log.info("[ - ] Word '%s' not found", word) ddict = None else: try: ddict = match.export() except: # duden bug ddict = None return ddict
def get_synonyms(series): word = duden.get(series) if word: new_entry = word.synonyms return new_entry else: return "entry not found"
def main(): # find the correct url # get definition and examples w1 = duden.get('einfach_einmal_simpel') # remove beispiel code to get the meanings??? print(w1.meaning_example)
async def _duden(self, ctx, word): """Search a word in the duden (German only!)""" get = duden.get(word) if get != None: embed = discord.Embed(title=f"DudenSearch ~ {word}", description=f"```{get}```", color=int(colors.random_color())) await ctx.send(embed=embed) else: await self.error(ctx.channel, word)
def setUpClass(cls): cls.samples = [] for filename in os.listdir(JSON_DIR): full_path = os.path.join(JSON_DIR, filename) if filename.endswith(".json"): with open(full_path, "r") as fh: word_json = json.load(fh) word_obj = duden.get(word_json["urlname"]) cls.samples.append((word_json, word_obj))
def is_in_duden(word): """ This function parses duden.de for a word and returns a positive value if the word is found """ try: word_in_duden = duden.get(word) except: print('Connection attempt failed.') return False if word_in_duden: print(word) return 1 else: return 0
def search_duden_frequency(words_inventory): if type(words_inventory) != list: words_inventory = words_inventory.split() words_inventory = replace_umlauts(words_inventory) frequency_list = [] for w in words_inventory: words = duden.get(w) if words: print('Got word if: ', words) try: frequency_list.append(words.frequency) except AttributeError: frequency_list.append(0) else: first_word = get_first_result(w) words = duden.get(first_word) print('Got word else: ', words) try: frequency_list.append(words.frequency) except AttributeError: frequency_list.append(0) return frequency_list
def get_pos(word): '''function to get the part of speech of words''' while True: try: w = duden.get(word) return w.part_of_speech break except: while True: try: ws = duden.search(word) pos = [re.search(r'\((.*?)\)', str(entry)).group(1) for entry in ws] return str(["word not found" if pos == [] else pos]) break except: return "word not found"
async def German(ctx, word: str): ''' prints information on discord about a specific german word. TODO: implement duden's "search" module for words with no "get" URL. ''' duden_obj = duden.get(word) if duden_obj == None: duden_obj = duden.search(word) ''' I can add in a user check for case sensitivity here''' if len(duden_obj) == 0: await ctx.send( f'```{word} is not in Duden, ya goof.\n\tPlease remember that duden is case sensitive.```' ) return else: print('else') duden_obj = duden_obj[0] def_ex = fix_meaning_overview(duden_obj) #prioritize more common definitions/exs #do not want to overwhelm users and show entire duden page prob = np.arange(len(def_ex), 0, -1) probz = [i / sum(prob) for i in prob] try: def_, ex_ = def_ex[np.random.choice(np.arange(0, len(def_ex)), p=probz)] except ValueError: await ctx.send( f'```Incomplete Duden entry.\n\tBedeutung: {duden_obj.meaning_overview[0]}```' ) print(def_ex) return await ctx.send( f'```Word: {word}\n\tWortart: {duden_obj.part_of_speech}\n\tBedeutung: {def_}\n\tBeispiel: {ex_}```' )
def __init__(self, word_string): """Instantiates the word and fetches it from the duden. :type word_string: str """ def word_to_url_friendly_word(word): string = word.strip() replace_dict = { "ä": "ae", "ö": "oe", "ü": "ue", "ß": "sz", "Ä": "Ae", "Ö": "Oe", "Ü": "Ue" } for k in replace_dict.keys(): string = string.replace(k, replace_dict[k]) return string self.word_string = word_string self.word = duden.get(word_to_url_friendly_word(word_string))
async def duden(self, ctx: commands.Context, word): if not await botchannel_check(ctx): Bot.dispatch(self.bot, "botchannelcheck_failure", ctx) return time = datetime.datetime.now() user = ctx.author.name try: w = duden.get(str(word)) if not w: w = duden.get(str((word.lower()).capitalize())) if w: pass else: embed = discord.Embed( title="**Fehler**", description=f"Das Wort ```{word}``` konnte nicht gefunden werden! \nEntweder es existiert nicht oder es gab einen Fehler bei der Suche - das kann unter bestimmten Umständen vorkommen, diese sind bekannt. Das wird im nächsten Update behoben ;)", colour=await get_embedcolour(ctx.message), ) embed._thumbnail = await get_embed_thumbnail() embed._footer = await get_embed_footer(ctx) await ctx.send(embed=embed) await log( text=f'{time}: Der Nutzer {user} hat versucht mit dem Befehl {await get_prefix_string(ctx.message)}duden Informationen über das Wort "{word}" zu bekommen, dieses konnte aber nicht gefunden werden!', guildid=ctx.guild.id, ) return word_type = w.part_of_speech.split(",")[0] if word_type == "Substantiv": embed = discord.Embed( title=f"Duden | {w.title}", description=f"Hier findest du alle Informationen zu dem Substantiv `{word}`!", colour=await get_embedcolour(ctx.message), ) embed.add_field(name="Name", value=w.name, inline=False) embed.add_field(name="Artikel", value=w.article, inline=False) embed.add_field( name="Geschlecht", value=str(w.part_of_speech.split(", ")[1]), inline=False, ) embed.add_field( name="Häufigkeit von 1-5", value=f"{await get_frequency(w.frequency, 5)} ({w.frequency})", inline=False, ) embed.add_field( name="Nutzung", value=w.usage if not None else "-", inline=False ) embed.add_field( name="Silbentrennung", value="".join([silbe + " · " for silbe in w.word_separation])[:-3], inline=False, ) embed.add_field( name="Bedeutung", value=( "".join( [m.replace("\n", " ") + ", " for m in w.meaning_overview] )[:-2] if isinstance(w.meaning_overview, list) else str(w.meaning_overview) ) if not "" else "-", inline=False, ) embed.add_field( name="Synonyme", value="".join([synonym for synonym in w.synonyms]) if w.synonyms else "-", inline=False, ) embed.add_field( name="Herkunft", value=w.origin if not None else "-", inline=False ) if "Verb" in word_type: embed = discord.Embed( title=f"Duden | {w.title}", description=f"Hier findest du alle Informationen zu dem Verb `{word}`!", colour=await get_embedcolour(ctx.message), ) embed.add_field(name="Name", value=w.name, inline=False) embed.add_field(name="Typ", value=str(w.part_of_speech), inline=False) embed.add_field( name="Häufigkeit von 1-5", value=f"{await get_frequency(w.frequency, 5)} ({w.frequency})" if w.frequency else "-", inline=False, ) embed.add_field( name="Silbentrennung", value="".join([silbe + " · " for silbe in w.word_separation])[:-3], inline=False, ) embed.add_field( name="Synonyme", value="".join([synonym for synonym in w.synonyms]) if not None else "-", inline=False, ) embed.add_field( name="Herkunft", value=w.origin if not None else "-", inline=False ) if word_type == "Adjektiv": embed = discord.Embed( title=f"Duden | {w.title}", description=f"Hier findest du alle Informationen zu dem Adjektiv `{word}`!", colour=await get_embedcolour(ctx.message), ) embed.add_field(name="Name", value=w.name, inline=False) embed.add_field( name="Häufigkeit von 1-5", value=f"{await get_frequency(w.frequency, 5)} ({w.frequency})" if w.frequency else "-", inline=False, ) embed.add_field( name="Silbentrennung", value="".join([silbe + " · " for silbe in w.word_separation])[:-3], inline=False, ) embed.add_field( name="Synonyme", value="".join([synonym for synonym in w.synonyms]) if not None else "-", inline=False, ) embed.add_field( name="Herkunft", value=w.origin if not None else "-", inline=False ) embed._footer = await get_embed_footer(ctx) embed.set_thumbnail( url="https://media.discordapp.net/attachments/851853486948745246/895023337103822898/Duden_FB_Profilbild.png?width=676&height=676" ) await ctx.send(embed=embed) await log( f"{time}: Der Nutzer {user} hat mit dem Befehl {await get_prefix_string(ctx.message)}" f'duden Informationen zum Wort "{word}" erhalten!', guildid=ctx.guild.id, ) except Exception: embed = discord.Embed( title="**Fehler**", description=f"Das Wort ```{word}``` konnte nicht gefunden werden! \nEntweder es existiert nicht oder es gab einen Fehler bei der Suche - das kann unter bestimmten Umständen vorkommen, diese sind bekannt. Das wird im nächsten Update behoben ;)", colour=await get_embedcolour(ctx.message), ) embed._thumbnail = await get_embed_thumbnail() embed._footer = await get_embed_footer(ctx) await ctx.send(embed=embed) await log( text=f'{time}: Der Nutzer {user} hat versucht mit dem Befehl {await get_prefix_string(ctx.message)}duden Informationen über das Wort "{word}" zu bekommen, dieses konnte aber nicht gefunden werden!', guildid=ctx.guild.id, )
def search_duden_frequency(self, words_inventory): if type(words_inventory) != list: words_inventory = words_inventory.split() def get_first_result(word): duden_url = 'http://www.duden.de/suchen/dudenonline/' r = requests.get(duden_url + word) data = r.text soup = BeautifulSoup(data, 'html.parser') try: main_sec = soup.find('section', id='block-duden-tiles-0') a_tags = [h2.a for h2 in main_sec.find_all('h2')] # print(a_tags[0].text) if a_tags[0].text == word: return a_tags[0].get('href').split('/')[-1] else: return 0 except AttributeError: return 0 # needed for duden module def replace_umlauts(word_list): umlaute = { 'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'Ä': 'Ae', 'Ö': 'Oe', 'Ü': 'Ue', 'ß': 'ss' } if type(word_list) == list: new_list = [] for word in word_list: no_umlaut = word.translate( {ord(k): v for k, v in umlaute.items()}) new_list.append(no_umlaut) if len(word_list) == len(new_list): return new_list else: print('List error') if type(word_list) == str: return word_list.translate( {ord(k): v for k, v in umlaute.items()}) else: print('Replace Umlauts works only on strings and lists') words_inventory = replace_umlauts(words_inventory) frequency_list = [] for w in words_inventory: words = duden.get(w) if words: try: frequency_list.append(words.frequency) except AttributeError: frequency_list.append(0) else: first_word = get_first_result(w) words = duden.get(first_word) try: frequency_list.append(words.frequency) except AttributeError: frequency_list.append(0) return frequency_list
def __init__(self, string): print('=' * 40) print(Style.BOLD + "Running FeatureExtractor on:", string + Style.END) print('-' * 40) try: self.fasttext_similar_words_dict = self.read_dict_from_file( DATA_RESSOURCES_PATH + AFFIXOID_DICTIONARY) self.empty_words_dict = self.read_dict_from_file( DATA_FINAL_PATH + EMPTY_WORDS_DICTIONARY) except FileNotFoundError: print('Please set correct paths for data.') cand = { 'Bilderbuch': [], 'Blitz': [], 'Bombe': [], 'Glanz': [], 'Heide': [], 'Jahrhundert': [], 'Qualität': [], 'Schwein': [], 'Spitze': [], 'Traum': [], 'Apostel': [], 'Bolzen': [], 'Dreck': [], 'Gott': [], 'Guru': [], 'Hengst': [], 'Ikone': [], 'König': [], 'Papst': [] } counter = 0 c2 = cand.copy() for key in cand: counter += 1 print() print('Line:', str(counter) + ' ===============================') # if counter == 50: # break try: w = duden.get(key) bedeutungen = w.meaning_overview synonyme = w.synonyms c2.update({ w.name: [{ 'Bedeutung': bedeutungen }, { 'Synonyme': synonyme }] }) # print('Title:', w.title) # print('Name:', w.name) # print('Bedeutung:', w.meaning_overview) # print('Synonyme:', w.synonyms) print('====================') except: pass print(c2)
def conjugate_de(verb): # use duden w = duden.get(verb) w_arr = np.full((7, 7, 2), None) # 5 tenses, 6 personnel noun and tense form table = [] # Infinitiv w_arr[0, 1, 0] = 'Infinitiv mit zu' w_arr[0, 1, 1] = w.grammar(duden.INFINITIV_MIT_ZU) prasens = w.grammar(duden.PRASENS) prateritum = w.grammar(duden.PRATERITUM) # Indikativ prasens w_arr[1, 0, 0] = "{:-<18}".format('') w_arr[1, 0, 1] = "{:-<25}".format('INDIKATIV' + ' ' + 'PRASENS') tmp = prasens[0::3] for j in range(6): form = tmp[j].split() # personal prenoun and form w_arr[1, j + 1, 0] = "{:<18}".format(form[0]) w_arr[1, j + 1, 1] = "{:<25}".format(form[1]) tmp = np.where(w_arr[1, :, :] is None, ' ', w_arr[1, :, :]) table.append(tabulate(tmp, tablefmt='plain', numalign="center")) # Indikativ prateritum w_arr[2, 0, 0] = "{:-<18}".format('') w_arr[2, 0, 1] = "{:-<25}".format('INDIKATIV' + ' ' + 'PRATERITUM') tmp = prateritum[0::2] for j in range(6): form = tmp[j].split() # personal prenoun and form w_arr[2, j + 1, 0] = "{:<18}".format(form[0]) w_arr[2, j + 1, 1] = "{:<25}".format(form[1]) tmp = np.where(w_arr[2, :, :] is None, ' ', w_arr[2, :, :]) table.append(tabulate(tmp, tablefmt='plain', numalign="center")) # Konjunktiv I w_arr[3, 0, 0] = "{:-<18}".format('') w_arr[3, 0, 1] = "{:-<25}".format('KONJUNKTIV I' + ' ' + 'PRASENS') tmp = prasens[1::3] for j in range(6): form = tmp[j].split() # personal prenoun and form w_arr[3, j + 1, 0] = "{:<18}".format(form[0]) w_arr[3, j + 1, 1] = "{:<25}".format(form[1]) tmp = np.where(w_arr[3, :, :] is None, ' ', w_arr[3, :, :]) table.append(tabulate(tmp, tablefmt='plain', numalign="center")) # Konjunktiv II w_arr[4, 0, 0] = "{:-<18}".format('') w_arr[4, 0, 1] = "{:-<25}".format('KONJUNKTIV II' + ' ' + 'PRATERITUM') tmp = prateritum[1::2] for j in range(6): form = tmp[j].split() # personal prenoun and form w_arr[4, j + 1, 0] = "{:<18}".format(form[0]) w_arr[4, j + 1, 1] = "{:<25}".format(form[1]) tmp = np.where(w_arr[4, :, :] is None, ' ', w_arr[4, :, :]) table.append(tabulate(tmp, tablefmt='plain', numalign="center")) # Imperativ w_arr[5, 0, 0] = "{:-<18}".format('') w_arr[5, 0, 1] = "{:-<25}".format('IMPERATIV' + ' ' + 'PRASENS') tmp = prasens[2::3] w_arr[5, 1, 0] = "{:-<18}".format('') w_arr[5, 4, 0] = "{:-<18}".format('') w_arr[5, 1, 1] = "{:<25},{}!".format(tmp[1].split()[0], tmp[1].split()[2]) w_arr[5, 4, 1] = "{:<25}!".format(tmp[4].split()[0]) tmp = np.where(w_arr[5, :, :] is None, ' ', w_arr[5, :, :]) table.append(tabulate(tmp, tablefmt='plain', numalign="center")) # blank column tmp = np.where(w_arr[6, :, :] is None, ' ', w_arr[6, :, :]) table.append(tabulate(tmp, tablefmt='plain', numalign="center")) # formated sg window col = [] k = 0 for i in range(3): # row row = [] for j in range(2): # column row.extend([ sg.Text(table[k], font='Courier', text_color='white'), sg.VSeparator(color='white') ]) k += 1 col.append(row) col.append([sg.HSeparator(color='white')]) return col
def test_get(self): word = "laufen" dword = duden.get("laufen") self.assertEqual(word, dword.title)
def print_synonyms(self): try: print(duden.get(self.word.synonyms)) except: print("No synonyms.")