Пример #1
0
 def get_duden_dict(self, word):
     self.log.debug("Looking for: %s", word)
     ddict = {}
     match = duden.get(word)
     if match is None:
         self.log.warning("[ ? ] Word '%s' not found in duden cache", word)
         found = duden.search(word, exact=False)
         if len(found) > 0:
             match = found[0]
             self.log.warning("[ < ] Word '%s' found in duden dictionary", word)
             try:
                 ddict = match.export()
                 self.log.info("[ + ] Word '%s' added to global cache", word)
             except:
                 self.log.error("[ ! ] Something went wrong with word '%s", word)
         else:
             self.log.info("[ - ] Word '%s' not found", word)
             ddict = None
     else:
         try:
             ddict = match.export()
         except:
             # duden bug
             ddict = None
     return ddict
Пример #2
0
def get_synonyms(series):
    word = duden.get(series)
    if word:
        new_entry = word.synonyms
        return new_entry
    else:
        return "entry not found"
Пример #3
0
def main():

    # find the correct url

    # get definition and examples
    w1 = duden.get('einfach_einmal_simpel')
    # remove beispiel code to get the meanings???
    print(w1.meaning_example)
Пример #4
0
 async def _duden(self, ctx, word):
     """Search a word in the duden (German only!)"""
     get = duden.get(word)
     if get != None:
         embed = discord.Embed(title=f"DudenSearch ~ {word}",
                               description=f"```{get}```",
                               color=int(colors.random_color()))
         await ctx.send(embed=embed)
     else:
         await self.error(ctx.channel, word)
Пример #5
0
    def setUpClass(cls):
        cls.samples = []

        for filename in os.listdir(JSON_DIR):
            full_path = os.path.join(JSON_DIR, filename)
            if filename.endswith(".json"):
                with open(full_path, "r") as fh:
                    word_json = json.load(fh)
                    word_obj = duden.get(word_json["urlname"])

                    cls.samples.append((word_json, word_obj))
Пример #6
0
def is_in_duden(word):
    """ This function parses duden.de for a word and returns a positive value if the word is found """
    try:
        word_in_duden = duden.get(word)
    except:
        print('Connection attempt failed.')
        return False
    if word_in_duden:
        print(word)
        return 1
    else:
        return 0
def search_duden_frequency(words_inventory):
    if type(words_inventory) != list:
        words_inventory = words_inventory.split()

    words_inventory = replace_umlauts(words_inventory)
    frequency_list = []

    for w in words_inventory:
        words = duden.get(w)
        if words:
            print('Got word if: ', words)
            try:
                frequency_list.append(words.frequency)
            except AttributeError:
                frequency_list.append(0)
        else:
            first_word = get_first_result(w)
            words = duden.get(first_word)
            print('Got word else: ', words)
            try:
                frequency_list.append(words.frequency)
            except AttributeError:
                frequency_list.append(0)
    return frequency_list
Пример #8
0
def get_pos(word):
    '''function to get the part of speech of words'''
    while True:
        try:
            w = duden.get(word)
            return w.part_of_speech
            break
        except:
            while True:
                try:
                    ws = duden.search(word)
                    pos = [re.search(r'\((.*?)\)', str(entry)).group(1) for entry in ws]
                    return str(["word not found" if pos == [] else pos])
                    break
                except:
                    return "word not found"
Пример #9
0
async def German(ctx, word: str):
    '''
    prints information on discord about a specific german word.
    TODO: implement duden's "search" module for words with no "get" URL.
    '''

    duden_obj = duden.get(word)

    if duden_obj == None:

        duden_obj = duden.search(word)
        ''' I can add in a user check for case sensitivity here'''
        if len(duden_obj) == 0:
            await ctx.send(
                f'```{word} is not in Duden, ya goof.\n\tPlease remember that duden is case sensitive.```'
            )
            return
        else:
            print('else')
            duden_obj = duden_obj[0]

    def_ex = fix_meaning_overview(duden_obj)

    #prioritize more common definitions/exs
    #do not want to overwhelm users and show entire duden page
    prob = np.arange(len(def_ex), 0, -1)
    probz = [i / sum(prob) for i in prob]
    try:
        def_, ex_ = def_ex[np.random.choice(np.arange(0, len(def_ex)),
                                            p=probz)]
    except ValueError:
        await ctx.send(
            f'```Incomplete Duden entry.\n\tBedeutung: {duden_obj.meaning_overview[0]}```'
        )
        print(def_ex)
        return
    await ctx.send(
        f'```Word: {word}\n\tWortart: {duden_obj.part_of_speech}\n\tBedeutung: {def_}\n\tBeispiel: {ex_}```'
    )
Пример #10
0
    def __init__(self, word_string):
        """Instantiates the word and fetches it from the duden.
        :type word_string: str
        """
        def word_to_url_friendly_word(word):
            string = word.strip()

            replace_dict = {
                "ä": "ae",
                "ö": "oe",
                "ü": "ue",
                "ß": "sz",
                "Ä": "Ae",
                "Ö": "Oe",
                "Ü": "Ue"
            }

            for k in replace_dict.keys():
                string = string.replace(k, replace_dict[k])
            return string

        self.word_string = word_string
        self.word = duden.get(word_to_url_friendly_word(word_string))
Пример #11
0
 async def duden(self, ctx: commands.Context, word):
     if not await botchannel_check(ctx):
         Bot.dispatch(self.bot, "botchannelcheck_failure", ctx)
         return
     time = datetime.datetime.now()
     user = ctx.author.name
     try:
         w = duden.get(str(word))
         if not w:
             w = duden.get(str((word.lower()).capitalize()))
             if w:
                 pass
             else:
                 embed = discord.Embed(
                     title="**Fehler**",
                     description=f"Das Wort ```{word}``` konnte nicht gefunden werden! \nEntweder es existiert nicht oder es gab einen Fehler bei der Suche - das kann unter bestimmten Umständen vorkommen, diese sind bekannt. Das wird im nächsten Update behoben ;)",
                     colour=await get_embedcolour(ctx.message),
                 )
                 embed._thumbnail = await get_embed_thumbnail()
                 embed._footer = await get_embed_footer(ctx)
                 await ctx.send(embed=embed)
                 await log(
                     text=f'{time}: Der Nutzer {user} hat versucht mit dem Befehl {await get_prefix_string(ctx.message)}duden Informationen über das Wort "{word}" zu bekommen, dieses konnte aber nicht gefunden werden!',
                     guildid=ctx.guild.id,
                 )
                 return
         word_type = w.part_of_speech.split(",")[0]
         if word_type == "Substantiv":
             embed = discord.Embed(
                 title=f"Duden | {w.title}",
                 description=f"Hier findest du alle Informationen zu dem Substantiv `{word}`!",
                 colour=await get_embedcolour(ctx.message),
             )
             embed.add_field(name="Name", value=w.name, inline=False)
             embed.add_field(name="Artikel", value=w.article, inline=False)
             embed.add_field(
                 name="Geschlecht",
                 value=str(w.part_of_speech.split(", ")[1]),
                 inline=False,
             )
             embed.add_field(
                 name="Häufigkeit von 1-5",
                 value=f"{await get_frequency(w.frequency, 5)} ({w.frequency})",
                 inline=False,
             )
             embed.add_field(
                 name="Nutzung", value=w.usage if not None else "-", inline=False
             )
             embed.add_field(
                 name="Silbentrennung",
                 value="".join([silbe + " · " for silbe in w.word_separation])[:-3],
                 inline=False,
             )
             embed.add_field(
                 name="Bedeutung",
                 value=(
                     "".join(
                         [m.replace("\n", " ") + ", " for m in w.meaning_overview]
                     )[:-2]
                     if isinstance(w.meaning_overview, list)
                     else str(w.meaning_overview)
                 )
                 if not ""
                 else "-",
                 inline=False,
             )
             embed.add_field(
                 name="Synonyme",
                 value="".join([synonym for synonym in w.synonyms])
                 if w.synonyms
                 else "-",
                 inline=False,
             )
             embed.add_field(
                 name="Herkunft", value=w.origin if not None else "-", inline=False
             )
         if "Verb" in word_type:
             embed = discord.Embed(
                 title=f"Duden | {w.title}",
                 description=f"Hier findest du alle Informationen zu dem Verb `{word}`!",
                 colour=await get_embedcolour(ctx.message),
             )
             embed.add_field(name="Name", value=w.name, inline=False)
             embed.add_field(name="Typ", value=str(w.part_of_speech), inline=False)
             embed.add_field(
                 name="Häufigkeit von 1-5",
                 value=f"{await get_frequency(w.frequency, 5)} ({w.frequency})"
                 if w.frequency
                 else "-",
                 inline=False,
             )
             embed.add_field(
                 name="Silbentrennung",
                 value="".join([silbe + " · " for silbe in w.word_separation])[:-3],
                 inline=False,
             )
             embed.add_field(
                 name="Synonyme",
                 value="".join([synonym for synonym in w.synonyms])
                 if not None
                 else "-",
                 inline=False,
             )
             embed.add_field(
                 name="Herkunft", value=w.origin if not None else "-", inline=False
             )
         if word_type == "Adjektiv":
             embed = discord.Embed(
                 title=f"Duden | {w.title}",
                 description=f"Hier findest du alle Informationen zu dem Adjektiv `{word}`!",
                 colour=await get_embedcolour(ctx.message),
             )
             embed.add_field(name="Name", value=w.name, inline=False)
             embed.add_field(
                 name="Häufigkeit von 1-5",
                 value=f"{await get_frequency(w.frequency, 5)} ({w.frequency})"
                 if w.frequency
                 else "-",
                 inline=False,
             )
             embed.add_field(
                 name="Silbentrennung",
                 value="".join([silbe + " · " for silbe in w.word_separation])[:-3],
                 inline=False,
             )
             embed.add_field(
                 name="Synonyme",
                 value="".join([synonym for synonym in w.synonyms])
                 if not None
                 else "-",
                 inline=False,
             )
             embed.add_field(
                 name="Herkunft", value=w.origin if not None else "-", inline=False
             )
         embed._footer = await get_embed_footer(ctx)
         embed.set_thumbnail(
             url="https://media.discordapp.net/attachments/851853486948745246/895023337103822898/Duden_FB_Profilbild.png?width=676&height=676"
         )
         await ctx.send(embed=embed)
         await log(
             f"{time}: Der Nutzer {user} hat mit dem Befehl {await get_prefix_string(ctx.message)}"
             f'duden Informationen zum Wort "{word}" erhalten!',
             guildid=ctx.guild.id,
         )
     except Exception:
         embed = discord.Embed(
             title="**Fehler**",
             description=f"Das Wort ```{word}``` konnte nicht gefunden werden! \nEntweder es existiert nicht oder es gab einen Fehler bei der Suche - das kann unter bestimmten Umständen vorkommen, diese sind bekannt. Das wird im nächsten Update behoben ;)",
             colour=await get_embedcolour(ctx.message),
         )
         embed._thumbnail = await get_embed_thumbnail()
         embed._footer = await get_embed_footer(ctx)
         await ctx.send(embed=embed)
         await log(
             text=f'{time}: Der Nutzer {user} hat versucht mit dem Befehl {await get_prefix_string(ctx.message)}duden Informationen über das Wort "{word}" zu bekommen, dieses konnte aber nicht gefunden werden!',
             guildid=ctx.guild.id,
         )
    def search_duden_frequency(self, words_inventory):
        if type(words_inventory) != list:
            words_inventory = words_inventory.split()

        def get_first_result(word):
            duden_url = 'http://www.duden.de/suchen/dudenonline/'
            r = requests.get(duden_url + word)
            data = r.text
            soup = BeautifulSoup(data, 'html.parser')
            try:
                main_sec = soup.find('section', id='block-duden-tiles-0')
                a_tags = [h2.a for h2 in main_sec.find_all('h2')]
                # print(a_tags[0].text)
                if a_tags[0].text == word:
                    return a_tags[0].get('href').split('/')[-1]
                else:
                    return 0
            except AttributeError:
                return 0

        # needed for duden module
        def replace_umlauts(word_list):
            umlaute = {
                'ä': 'ae',
                'ö': 'oe',
                'ü': 'ue',
                'Ä': 'Ae',
                'Ö': 'Oe',
                'Ü': 'Ue',
                'ß': 'ss'
            }
            if type(word_list) == list:
                new_list = []
                for word in word_list:
                    no_umlaut = word.translate(
                        {ord(k): v
                         for k, v in umlaute.items()})
                    new_list.append(no_umlaut)

                if len(word_list) == len(new_list):
                    return new_list
                else:
                    print('List error')
            if type(word_list) == str:
                return word_list.translate(
                    {ord(k): v
                     for k, v in umlaute.items()})
            else:
                print('Replace Umlauts works only on strings and lists')

        words_inventory = replace_umlauts(words_inventory)
        frequency_list = []

        for w in words_inventory:
            words = duden.get(w)
            if words:
                try:
                    frequency_list.append(words.frequency)
                except AttributeError:
                    frequency_list.append(0)
            else:
                first_word = get_first_result(w)
                words = duden.get(first_word)
                try:
                    frequency_list.append(words.frequency)
                except AttributeError:
                    frequency_list.append(0)

        return frequency_list
    def __init__(self, string):
        print('=' * 40)
        print(Style.BOLD + "Running FeatureExtractor on:", string + Style.END)
        print('-' * 40)

        try:
            self.fasttext_similar_words_dict = self.read_dict_from_file(
                DATA_RESSOURCES_PATH + AFFIXOID_DICTIONARY)
            self.empty_words_dict = self.read_dict_from_file(
                DATA_FINAL_PATH + EMPTY_WORDS_DICTIONARY)

        except FileNotFoundError:
            print('Please set correct paths for data.')

        cand = {
            'Bilderbuch': [],
            'Blitz': [],
            'Bombe': [],
            'Glanz': [],
            'Heide': [],
            'Jahrhundert': [],
            'Qualität': [],
            'Schwein': [],
            'Spitze': [],
            'Traum': [],
            'Apostel': [],
            'Bolzen': [],
            'Dreck': [],
            'Gott': [],
            'Guru': [],
            'Hengst': [],
            'Ikone': [],
            'König': [],
            'Papst': []
        }

        counter = 0
        c2 = cand.copy()
        for key in cand:
            counter += 1
            print()

            print('Line:', str(counter) + ' ===============================')
            # if counter == 50:
            #     break
            try:
                w = duden.get(key)
                bedeutungen = w.meaning_overview
                synonyme = w.synonyms
                c2.update({
                    w.name: [{
                        'Bedeutung': bedeutungen
                    }, {
                        'Synonyme': synonyme
                    }]
                })
                # print('Title:', w.title)
                # print('Name:', w.name)
                # print('Bedeutung:', w.meaning_overview)
                # print('Synonyme:', w.synonyms)
                print('====================')
            except:
                pass

        print(c2)
Пример #14
0
def conjugate_de(verb):

    # use duden
    w = duden.get(verb)

    w_arr = np.full((7, 7, 2),
                    None)  # 5 tenses, 6 personnel noun and tense form
    table = []

    # Infinitiv
    w_arr[0, 1, 0] = 'Infinitiv mit zu'
    w_arr[0, 1, 1] = w.grammar(duden.INFINITIV_MIT_ZU)

    prasens = w.grammar(duden.PRASENS)
    prateritum = w.grammar(duden.PRATERITUM)

    # Indikativ prasens
    w_arr[1, 0, 0] = "{:-<18}".format('')
    w_arr[1, 0, 1] = "{:-<25}".format('INDIKATIV' + ' ' + 'PRASENS')
    tmp = prasens[0::3]
    for j in range(6):
        form = tmp[j].split()
        # personal prenoun and form
        w_arr[1, j + 1, 0] = "{:<18}".format(form[0])
        w_arr[1, j + 1, 1] = "{:<25}".format(form[1])
    tmp = np.where(w_arr[1, :, :] is None, ' ', w_arr[1, :, :])
    table.append(tabulate(tmp, tablefmt='plain', numalign="center"))

    # Indikativ prateritum
    w_arr[2, 0, 0] = "{:-<18}".format('')
    w_arr[2, 0, 1] = "{:-<25}".format('INDIKATIV' + ' ' + 'PRATERITUM')
    tmp = prateritum[0::2]
    for j in range(6):
        form = tmp[j].split()
        # personal prenoun and form
        w_arr[2, j + 1, 0] = "{:<18}".format(form[0])
        w_arr[2, j + 1, 1] = "{:<25}".format(form[1])
    tmp = np.where(w_arr[2, :, :] is None, ' ', w_arr[2, :, :])
    table.append(tabulate(tmp, tablefmt='plain', numalign="center"))

    # Konjunktiv I
    w_arr[3, 0, 0] = "{:-<18}".format('')
    w_arr[3, 0, 1] = "{:-<25}".format('KONJUNKTIV I' + ' ' + 'PRASENS')
    tmp = prasens[1::3]
    for j in range(6):
        form = tmp[j].split()
        # personal prenoun and form
        w_arr[3, j + 1, 0] = "{:<18}".format(form[0])
        w_arr[3, j + 1, 1] = "{:<25}".format(form[1])
    tmp = np.where(w_arr[3, :, :] is None, ' ', w_arr[3, :, :])
    table.append(tabulate(tmp, tablefmt='plain', numalign="center"))

    # Konjunktiv II
    w_arr[4, 0, 0] = "{:-<18}".format('')
    w_arr[4, 0, 1] = "{:-<25}".format('KONJUNKTIV II' + ' ' + 'PRATERITUM')
    tmp = prateritum[1::2]
    for j in range(6):
        form = tmp[j].split()
        # personal prenoun and form
        w_arr[4, j + 1, 0] = "{:<18}".format(form[0])
        w_arr[4, j + 1, 1] = "{:<25}".format(form[1])
    tmp = np.where(w_arr[4, :, :] is None, ' ', w_arr[4, :, :])
    table.append(tabulate(tmp, tablefmt='plain', numalign="center"))

    # Imperativ
    w_arr[5, 0, 0] = "{:-<18}".format('')
    w_arr[5, 0, 1] = "{:-<25}".format('IMPERATIV' + ' ' + 'PRASENS')
    tmp = prasens[2::3]
    w_arr[5, 1, 0] = "{:-<18}".format('')
    w_arr[5, 4, 0] = "{:-<18}".format('')
    w_arr[5, 1, 1] = "{:<25},{}!".format(tmp[1].split()[0], tmp[1].split()[2])
    w_arr[5, 4, 1] = "{:<25}!".format(tmp[4].split()[0])
    tmp = np.where(w_arr[5, :, :] is None, ' ', w_arr[5, :, :])
    table.append(tabulate(tmp, tablefmt='plain', numalign="center"))

    # blank column
    tmp = np.where(w_arr[6, :, :] is None, ' ', w_arr[6, :, :])
    table.append(tabulate(tmp, tablefmt='plain', numalign="center"))

    # formated sg window
    col = []
    k = 0
    for i in range(3):  # row
        row = []
        for j in range(2):  # column
            row.extend([
                sg.Text(table[k], font='Courier', text_color='white'),
                sg.VSeparator(color='white')
            ])
            k += 1
        col.append(row)
        col.append([sg.HSeparator(color='white')])

    return col
Пример #15
0
 def test_get(self):
     word = "laufen"
     dword = duden.get("laufen")
     self.assertEqual(word, dword.title)
Пример #16
0
 def print_synonyms(self):
     try:
         print(duden.get(self.word.synonyms))
     except:
         print("No synonyms.")