Пример #1
0
    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content = json.loads(content)

        try:
            # Get the first definition string from JSON.
            definition = content['en'][0]['definitions'][0]['definition']
        except KeyError as exception:
            # API can return JSON that does not contain 'en' language.
            raise NotFoundError(word)
        else:
            # Clean the definition string from HTML tags.
            definition = BeautifulSoup(definition, "html.parser").text
            content = {}
            content['definition'] = definition

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #2
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        data = bs4.BeautifulSoup(webpage, "html.parser")
        content = {}

        # Please bump version if the format changes again.
        # the `show` function will act with respect to version number.

        content['version'] = 2

        # Here are details of each version.
        #
        # The original one, in the old era, there wasn't any concept of
        # version number:
        # content = {
        #     'word': ...,
        #     'pronounce': ...,
        #     'sound': (optional),
        #     'explain': [...],
        #     'verbose': [...],
        # }
        #
        # Verion 2, yahoo dictionary content is provided by Dy.eye
        # at that moment:
        # content = {
        #     'version': 2,
        #     'summary': {
        #         'word': ...,
        #         'pronounce': [('KK', '...'), (...)],  // optional.
        #                                               // e.g. 'google'
        #         'explain': [(optional)],  # 'hospitalized' is summary-only
        #         'grammar': [(optional)],
        #     },
        #     'explain': [...],
        #     'verbose': [(optional)],
        # }

        # Construct summary (required)
        try:
            content['summary'] = self.parse_summary(data, word)
        except AttributeError:
            raise NotFoundError(word)

        # Handle explain (required)
        try:
            content['explain'] = self.parse_explain(data)
        except IndexError:
            raise NotFoundError(word)

        # Extract verbose (optional)
        content['verbose'] = self.parse_verbose(data)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record
Пример #3
0
    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content = json.loads(content)

        try:
            # Get the first definition string from JSON.
            content = content['en']
        except KeyError:
            # API can return JSON that does not contain 'en' language.
            raise NotFoundError(word)

        # Define a list that will be used to create a Record.
        r_content = []

        # For every part of speech append r_content corresponding list.
        for i, d in enumerate(content):
            # Add what part of speech current definitions refers to.
            r_content.append({'part_of_speech': d['partOfSpeech']})

            # Create a list that will store english_definitions
            # of the current part of speech.
            r_content[i]['definitions'] = []

            for j, d2 in enumerate(d['definitions']):
                # Parse definition and append definitions list.
                definition = BeautifulSoup(d2['definition'],
                                           "html.parser").text
                r_content[i]['definitions'].append({'definition': definition})

                # If API provides examples for the current definition
                # create a new list and append them.
                try:
                    d2['examples']
                except KeyError:
                    pass
                else:
                    r_content[i]['definitions'][j]['examples'] = []
                    for ex in d2['examples']:
                        ex = BeautifulSoup(ex, "html.parser").text
                        r_content[i]['definitions'][j]['examples'].append(ex)

        record = Record(
            word=word,
            content=json.dumps(r_content),
            source=self.provider,
        )

        return record
Пример #4
0
    def query(self, word: str):
        content = self._get_raw(word)
        content_json = json.loads(content)
        if not content_json['data']:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record
Пример #5
0
    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content_json = json.loads(content)

        status = content_json.get('code')
        if status != 200:
            # https://tech.yandex.com/translate/doc/dg/reference/translate-docpage/#codes
            message = self.status_code.get(
                status, 'Some bad thing happened with Yandex')
            print('Yandex: ' + message)
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record
Пример #6
0
    def query(self, word: str):
        content = self._get_raw(word)

        if "no_results" in content:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )

        return record
Пример #7
0
    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )

        return record
Пример #8
0
    def query(self, word: str):
        content_str = self._get_raw(word)
        content_dict = json.loads(content_str)

        if content_dict['list'] == []:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content_str,
            source=self.provider,
        )

        return record
Пример #9
0
    def parse_summary(self, data, word):
        def get_explain(e: bs4.element.Tag):
            def f(ks):
                return ('pos' if 'pos_button' in ks else
                        'explain' if 'dictionaryExplanation' in ks else '?')

            return [(f(m.attrs['class']), m.text) for n in e.select('ul > li')
                    for m in n.select('div')]

        def get_pronounce(p: bs4.element.Tag):
            return list(
                map(lambda x: re.match(r'(.*)(\[.*\])', x).groups(),
                    p.find('ul').text.strip().split()))

        def get_grammar(d: bs4.element.Tag):
            s = ('div#web ol.searchCenterMiddle '
                 'div.dictionaryWordCard > ul > li')
            return list(map(text, d.select(s)))

        node = data.select_one('div#web ol.searchCenterMiddle')
        node = node.select('div.sys_dict_word_card > div.grp-main > div')

        p = None  # optional
        if node is None or len(node) <= 1:  # e.g. "fabor"
            raise NotFoundError(word)
        elif len(node) == 2:  # e.g. "apples"
            w, e = node
        elif len(node) == 3:  # e.g. ?
            w, _, e = node
        elif len(node) == 4:  # e.g. ?
            w, _, _, e = node
        elif len(node) == 5:  # e.g. "metadata"
            w, p, _, _, e = node
        elif len(node) == 6:
            w, p, _, _, _, e = node

        return {
            'word': w.find('span').text.strip(),
            'pronounce': get_pronounce(p) if p else [],  # optional
            'explain': get_explain(e),
            'grammar': get_grammar(data),  # optional
        }
Пример #10
0
    def query(self, word: str):
        try:
            app_id, app_key = self._get_app_key()
            content = self._get_raw(word,
                                    headers={
                                        'app_id': app_id,
                                        'app_key': app_key
                                    })
        except QueryError as exception:
            msg = self.status_code.get(exception.status_code,
                                       'Some bad thing happened')
            self.color.print('Oxford: ' + msg, 'red')
            raise NotFoundError(exception.word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record
Пример #11
0
    def query(self, word: str):
        requests.packages.urllib3.disable_warnings()
        content = self._get_raw(word, verify=False)

        data = {"title": word, "sources": defaultdict(list)}
        soup = BeautifulSoup(content, "html.parser")
        for tr in soup.find_all("tr", {"class": "dash"}):
            source = (
                tr.find("td", attrs={"class": "sourceW"}).find("a").text
            ).strip()
            en = tr.find("td", attrs={"class": "ennameW"}).text.strip()
            zhtw = tr.find("td", attrs={"class": "zhtwnameW"}).text.strip()
            data["sources"][source].append((en, zhtw))

        if len(data["sources"]) == 0:
            raise NotFoundError(word)

        record = Record(
            word=word, content=json.dumps(data), source=self.provider
        )
        return record
Пример #12
0
    def parse_summary(self, data, word):
        def gete(x: 'bs4 node'):
            def f(ks):
                return ('pos' if 'pos_button' in ks else
                        'explain' if 'dictionaryExplanation' in ks else '?')

            return [(f(m.attrs['class']), m.text) for n in x.select('ul > li')
                    for m in n.select('div')]

        def getp(p):
            return list(
                map(lambda x: re.match('(.*)(\[.*\])', x).groups(),
                    p.find('ul').text.strip().split()))

        def getg(d):
            s = ('div#web ol.searchCenterMiddle '
                 'div.dictionaryWordCard > ul > li')
            return list(map(text, data.select(s)))

        node = data.select_one('div#web ol.searchCenterMiddle > li > div')
        node = node.select('> div')

        p = None  # optional
        if len(node) == 6:  # e.g. "metadata"
            _, w, p, _, _, e = node
        elif len(node) == 5:
            _, w, p, _, e = node
        elif len(node) == 4:  # e.g. "hold on"
            _, w, _, e = node
        elif len(node) == 3:  # e.g. "google"
            _, w, e = node
        elif len(node) <= 2:  # e.g. "fabor"
            raise NotFoundError(word)

        return {
            'word': w.find('span').text.strip(),
            'pronounce': getp(p) if p else [],  # optional
            'explain': gete(e),
            'grammar': getg(data),  # optional
        }
Пример #13
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        card = data.find('div', attrs={'class': 'card'})
        entry = card.find(
            # just get the first one
            attrs={'class': 'dictionary-entry'})

        if not entry:
            raise NotFoundError(word)

        content['explains'] = []

        # word can be existing in both English & Spanish
        word_element = (card.find(attrs={'id': 'headword-en'})
                        or card.find(attrs={'id': 'headword-es'}))
        if word_element is None:
            raise NotFoundError(word)
        content['word'] = word_element.text

        pattern1 = {'class': 'dictionary-neodict-indent-1'}
        pattern2 = {'class': 'dictionary-neodict-indent-2'}
        pattern3 = {'class': 'dictionary-neodict-indent-3'}
        pattern_order = {'class': 'dictionary-neodict-translation'}
        pattern_example = {'class': 'dictionary-neodict-example'}
        pattern1_en = {'class': 'dictionary-neoharrap-indent-1'}
        pattern2_en = {'class': 'dictionary-neoharrap-indent-2'}
        pattern_order_en = {'class': 'dictionary-neoharrap-translation'}

        speeches = card.find_all(attrs={'class': 'part_of_speech'})

        for (speech, category) in zip(
                speeches,
                entry.find_all(attrs=pattern1)
                or entry.find_all(attrs=pattern1_en)):
            result = []
            content['explains'].append([speech.text, result])
            context = category.find(attrs={'class': 'context'}).text
            explains = []

            for explain in (category.find_all(attrs=pattern2)
                            or category.find_all(attrs=pattern2_en)):

                orders = (explain.find_all(attrs=pattern_order)
                          or explain.find_all(attrs=pattern_order_en))

                if orders:
                    # e.g.
                    #
                    #   ('a. forgiveness', 'b. pardon (law)')
                    #
                    indices = tuple(
                        map(lambda x: x.text.replace('\xa0', ' ').strip(),
                            orders))
                else:
                    continue

                examples = explain.find_all(attrs=pattern3)

                for (example, index) in zip(examples, indices):
                    t = tuple(example.find(attrs=pattern_example))
                    (spanish, english) = (t[0].text, t[2].text)
                    explains.append((index, spanish, english))

                if (not examples) and (len(indices) > 0):
                    for index in indices:
                        explains.append((index, ))

            result.append([context, explains])

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #14
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        # handle record.word
        try:
            content['word'] = data.find('span', id='term').text
        except AttributeError:
            raise NotFoundError(word)

        # handle pronounce
        pronu_value = data.find('span', id='pronunciation_pos').text
        if pronu_value:
            content['pronounce'] = []
            for match in re.finditer('(\w+)(\[.*?\])', pronu_value):
                content['pronounce'].append(match.group(1, 2))

        # handle sound
        proun_sound = data.find(
            'span',
            style="display: none;",
            id="iconStyle",
            class_="tri",
            title="http://product.dreye.com.tw/",
        )
        if proun_sound:
            content['sound'] = {}
            d = json.loads(proun_sound.text)

            sound_types_and_urls = (d.get('sound_url_1', []) +
                                    d.get('sound_url_2', []))
            sound_accents = (d.get('sound_type_1', []) +
                             d.get('sound_type_2', []))

            for sound_type_and_url, sound_accent in zip(
                    sound_types_and_urls, sound_accents):
                if sound_type_and_url:
                    sound_type, sound_url = list(sound_type_and_url.items())[0]
                    content['sound'].setdefault(sound_type, {}).setdefault(
                        sound_accent, []).append(sound_url)

        # Handle explain
        main_explanations = data.find(
            class_='dd algo explain mt-20 lst DictionaryResults')
        if main_explanations:
            main_explanations = itertools.zip_longest(
                main_explanations.find_all(class_='compTitle mb-10'),
                main_explanations.find_all(
                    class_='compArticleList mb-15 ml-10', ))
        else:
            main_explanations = ""

        content['explain'] = []
        for part_of_speech, meaning in main_explanations:
            node = [part_of_speech.text] if part_of_speech else ['']

            for item in meaning.find_all('li', class_='ov-a'):
                pack = [item.find('h4').text]

                for example in (
                        tag for tag in item.find_all('span')
                        if 'line-height: 17px;' not in tag.get('style', {})):
                    sentence = ''

                    for w in example.contents:
                        if w.name == 'b':
                            sentence += '*' + w.text + '*'
                        else:
                            try:
                                sentence += w
                            except Exception:
                                pass

                    pack.append((sentence.strip()))
                node.append(pack)
            content['explain'].append(node)

            # verbose info
            part_of_speech_list, meaning_list = [], []
            content['verbose'] = []

            variation_explanations = data.find(
                class_='dd algo variation fst DictionaryResults')
            if variation_explanations:
                part_of_speech_list.extend(
                    variation_explanations.find_all(class_='compTitle'))
                meaning_list.extend(
                    variation_explanations.find_all(class_='compArticleList'))

            additional_explanations = data.find(
                class_='dd algo othersNew lst DictionaryResults')
            if additional_explanations:
                part_of_speech_list.extend(
                    additional_explanations.find_all(class_='compTitle mt-26'))
                meaning_list.extend(
                    additional_explanations.find_all(class_='compArticleList'))

            more_explanations = itertools.zip_longest(part_of_speech_list,
                                                      meaning_list)

            for part_of_speech, meaning in more_explanations:
                node = [part_of_speech.text] if part_of_speech else ['']

                if meaning:
                    for item in meaning.find_all('li', class_='ov-a'):
                        pack = [item.find('h4').text]

                        for example in (
                                tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                            sentence = ''

                            for w in example.contents:
                                if w.name == 'b':
                                    sentence += '*' + w.text + '*'
                                else:
                                    try:
                                        sentence += w
                                    except Exception:
                                        pass

                            pack.append((sentence.strip()))
                        node.append(pack)
                content['verbose'].append(node)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record
Пример #15
0
    def query(self, word: str):
        r = requests.post(
            self.POST_API,
            json={
                "c": "1",
                "t": "all",
                "q": word,
            },
        )
        content = self._get_raw(word, cookies=r.cookies)

        data = {
            "title": word,
            "exact_sources": defaultdict(list),
            "fuzzy_sources": defaultdict(list),
        }
        soup = BeautifulSoup(content, "html.parser")

        # Exact matching
        exact = soup.find(id="accordion_cross")
        if exact:
            for div in exact.find_all("div", {"class": "panel"}):
                title = div.find("div", {"class": "title"}).find("a").text
                defs = [
                    div.find("strong", {
                        "class": "word"
                    }).find_all("span")[-1].text.strip()
                ]
                source = div.find("strong", {"class": "race"}).text

                for i in div.find_all("li")[1:]:
                    d = i.find("strong", {"class": "word"})
                    if d:
                        defs.append(d.find_all("span")[-1].text.strip())

                link = (self.BASE_URL +
                        div.find("a", {"class": "btn-more"})["href"])
                data["exact_sources"][source] = {
                    "title": title,
                    "defs": defs,
                    "link": link,
                }

        # Fuzzy matching
        fuzzy = soup.find(id="accordion")
        if fuzzy:
            for div in fuzzy.find_all("div", {"class": "panel"}):
                title = div.find("div", {"class": "title"}).find("a").text
                defs = [
                    div.find("strong", {
                        "class": "word"
                    }).find_all("span")[-1].text.strip()
                ]
                source = div.find("strong", {"class": "race"}).text

                for i in div.find_all("li")[1:]:
                    d = i.find("strong", {"class": "word"})
                    if d:
                        defs.append(d.find_all("span")[-1].text.strip())

                link = (self.BASE_URL + div.find("div", {
                    "class": "title"
                }).find("a")["href"])
                data["fuzzy_sources"][source].append({
                    "title": title,
                    "defs": defs,
                    "link": link
                })

        if not exact and not fuzzy:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=json.dumps(data),
            source=self.provider,
        )
        return record
Пример #16
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        response = json.loads(soup.text)

        # Not Found
        if not response.get("列表"):
            raise NotFoundError(word)

        # Show Chinese word from iTaigi in stead of user input if possible
        with suppress(KeyError, IndexError):
            word = response["列表"][0]["外語資料"]

        content = {}

        # Fetch basic words with text, pronounce and sentence
        try:
            basic_words = response["列表"][0]["新詞文本"]
        except Exception:
            raise
        else:
            content['basic_words'] = []
            for basic_word in basic_words:
                d = {}

                text = self._get_word_text(basic_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(basic_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['basic_words'].append(d)

            # Fix issue-452 for iTaigi testings
            # iTaigi returns basic_words in random order.
            # Since we store basic_words in a list,
            # We have to sort it before saving into database
            # or the unit-testings would fail.
            content['basic_words'].sort(key=lambda word: word['text'])

        # Fetch related words
        try:
            related_words = response["其他建議"]
        except Exception:
            raise
        else:
            content['related_words'] = []
            for related_word in related_words:
                d = {}

                text = self._get_word_text(related_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(related_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['related_words'].append(d)

        # Save content with word and provider.
        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #17
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        # handle record.word
        try:
            content['word'] = data.find('span', id='term').text
        except AttributeError:
            raise NotFoundError(word)

        # handle pronounce
        pronu_value = data.find('span', id='pronunciation_pos').text
        if pronu_value:
            content['pronounce'] = []
            for match in re.finditer('(\w+)(\[.*?\])', pronu_value):
                content['pronounce'].append(match.group(1, 2))

        # handle sound
        pronu_sound = data.find(class_='proun_sound')
        if pronu_sound:
            content['sound'] = [
                ('mp3',
                 pronu_sound.find(class_='source',
                                  attrs={
                                      'data-type': 'audio/mpeg'
                                  }).attrs['data-src']),
                ('ogg',
                 pronu_sound.find(class_='source',
                                  attrs={
                                      'data-type': 'audio/ogg'
                                  }).attrs['data-src']),
            ]

        # Handle explain
        main_explanations = data.find(
            class_='dd algo explain mt-20 lst DictionaryResults')
        if main_explanations:
            main_explanations = itertools.zip_longest(
                main_explanations.find_all(class_='compTitle mb-10'),
                main_explanations.find_all(
                    class_='compArticleList mb-15 ml-10', ))
        else:
            main_explanations = ""

        content['explain'] = []
        for part_of_speech, meaning in main_explanations:
            node = [part_of_speech.text] if part_of_speech else ['']

            for item in meaning.find_all('li', class_='ov-a'):
                pack = [item.find('h4').text]

                for example in (tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                    sentence = ''

                    for w in example.contents:
                        if w.name == 'b':
                            sentence += '*' + w.text + '*'
                        else:
                            try:
                                sentence += w
                            except:
                                pass

                    pack.append((sentence.strip()))
                node.append(pack)
            content['explain'].append(node)

            # verbose info
            part_of_speech_list, meaning_list = [], []
            content['verbose'] = []

            variation_explanations = data.find(
                class_='dd algo variation fst DictionaryResults')
            if variation_explanations:
                part_of_speech_list.extend(
                    variation_explanations.find_all(class_='compTitle'))
                meaning_list.extend(
                    variation_explanations.find_all(class_='compArticleList'))

            additional_explanations = data.find(
                class_='dd algo othersNew lst DictionaryResults')
            if additional_explanations:
                part_of_speech_list.extend(
                    additional_explanations.find_all(class_='compTitle mt-26'))
                meaning_list.extend(
                    additional_explanations.find_all(class_='compArticleList'))

            more_explanations = itertools.zip_longest(part_of_speech_list,
                                                      meaning_list)

            for part_of_speech, meaning in more_explanations:
                node = [part_of_speech.text] if part_of_speech else ['']

                if meaning:
                    for item in meaning.find_all('li', class_='ov-a'):
                        pack = [item.find('h4').text]

                        for example in (
                                tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                            sentence = ''

                            for w in example.contents:
                                if w.name == 'b':
                                    sentence += '*' + w.text + '*'
                                else:
                                    try:
                                        sentence += w
                                    except:
                                        pass

                            pack.append((sentence.strip()))
                        node.append(pack)
                content['verbose'].append(node)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record
Пример #18
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        content = {}

        en_css = "#dictionary-neodict-en"
        es_css = "#dictionary-neodict-es"
        card = soup.select_one(en_css) or soup.select_one(es_css)
        if card is None:
            raise NotFoundError(word)

        word_css = "div > div:nth-child(1) > span"
        word_element = card.select_one(word_css)
        if word_element is None:
            raise NotFoundError(word)
        content['word'] = word_element.text
        '''
        COPULAR VERB  # speech
            # categories_card
            1. (used to express a permanent quality)  # category_text
                # explanation
                a. ser  # index
                # examples
                    # example
                    The ocean is blue.
                    El océano es azul.
            2. (used to express a temporary state)
                a. estar
                    I'm not in a good mood today.
                    Hoy no estoy de buen humor.

                    The sky is cloudy.
                    El cielo está nublado.
        ... (Another speech if it has.)
        '''
        speech_pattern = "div > div:nth-child(2)"
        # "#dictionary-neodict-en > div > div:nth-child(2)"

        # Start to grab
        content['explains'] = []
        speech = card.select_one(speech_pattern)
        while speech:
            result = []
            speech_text, categories_card = speech.children
            speech_text_element = speech_text.find(['a', 'span'])
            content['explains'].append([speech_text_element.text, result])

            for category in categories_card.children:
                category_text_element, explanations_card = category.children
                category_text = category_text_element.text

                explains = []
                for explanation in explanations_card.children:
                    for _ in explanation.children:
                        index_elements, examples = (_.contents[:-1],
                                                    _.contents[-1])
                        index = ' '.join([
                            _.text.strip() for _ in index_elements if _ != ' '
                        ])

                        if (not examples) and index:
                            explains.append((index, ))
                            continue

                        sentences = []
                        for example in examples:
                            t = example.find_all()
                            # Should be only 3 elements
                            # [text, —,  text]
                            '''
                            When Spanish => English, it will show Spanish first
                            When English => Spanish, it will show English first
                            So, the variables below are not definitely
                            '''
                            sentences.append((t[0].text, t[2].text))
                        explains.append((index, sentences))

                result.append([category_text, explains])
            speech = speech.next_sibling

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #19
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        response = json.loads(soup.text)

        # Not Found
        if not response.get("列表"):
            raise NotFoundError(word)

        # Show Chinese word from iTaigi in stead of user input if possible
        with suppress(KeyError, IndexError):
            word = response["列表"][0]["外語資料"]

        content = {}

        # Fetch basic words with text, pronounce and sentence
        try:
            basic_words = response["列表"][0]["新詞文本"]
        except Exception:
            raise
        else:
            content['basic_words'] = []
            for basic_word in basic_words:
                d = {}

                text = self._get_word_text(basic_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(basic_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['basic_words'].append(d)

        # Fetch related words
        try:
            related_words = response["其他建議"]
        except Exception:
            raise
        else:
            content['related_words'] = []
            for related_word in related_words:
                d = {}

                text = self._get_word_text(related_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(related_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['related_words'].append(d)

        # Save content with word and provider.
        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record