Пример #1
0
    def test_show(self):
        content = '''
        {
            "heteronyms": [{
                "bopomofo": "ㄧㄢˋ",
                "bopomofo2": "yàn",
                "definitions": [{
                    "def": "假的、偽造的。",
                    "example": ["如:「贗品」。"],
                    "quote": ["..."],
                    "type": "形",
                    "synonyms": "尛",
                    "antonyms": "萌"
                }],
                "pinyin": "yàn"
            }],
            "non_radical_stroke_count": 15,
            "radical": "貝",
            "stroke_count": 22,
            "title": "贗"
        }
        '''
        r = Record(word='贗', content=content, source=self.dict.provider)

        # god bless this method, hope that it do not raise any exception
        self.dict.show(r)
Пример #2
0
    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content = json.loads(content)

        try:
            # Get the first definition string from JSON.
            definition = content['en'][0]['definitions'][0]['definition']
        except KeyError as exception:
            # API can return JSON that does not contain 'en' language.
            raise NotFoundError(word)
        else:
            # Clean the definition string from HTML tags.
            definition = BeautifulSoup(definition, "html.parser").text
            content = {}
            content['definition'] = definition

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #3
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        content = {}

        # Parse `data` and fill the information you need into `content`
        #
        # Use
        # ```
        # except AttributeError:
        #    raise NotFoundError(word)
        # ```
        # while the word users try to query is not found on this dictionary.

        if self.args.verbose:
            # For verbose mode
            pass

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #4
0
    def test_show(self):
        content = '''
        [{"part_of_speech":"part_of_speech",
        "definitions":[{"definition": "definition","examples":["example"]}]}]
        '''

        r = Record(word="string", content=content, source=self.dict.provider)
        self.dict.show(r)
Пример #5
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        data = bs4.BeautifulSoup(webpage, "html.parser")
        content = {}

        # Please bump version if the format changes again.
        # the `show` function will act with respect to version number.

        content['version'] = 2

        # Here are details of each version.
        #
        # The original one, in the old era, there wasn't any concept of
        # version number:
        # content = {
        #     'word': ...,
        #     'pronounce': ...,
        #     'sound': (optional),
        #     'explain': [...],
        #     'verbose': [...],
        # }
        #
        # Verion 2, yahoo dictionary content is provided by Dy.eye
        # at that moment:
        # content = {
        #     'version': 2,
        #     'summary': {
        #         'word': ...,
        #         'pronounce': [('KK', '...'), (...)],  // optional.
        #                                               // e.g. 'google'
        #         'explain': [(optional)],  # 'hospitalized' is summary-only
        #         'grammar': [(optional)],
        #     },
        #     'explain': [...],
        #     'verbose': [(optional)],
        # }

        # Construct summary (required)
        try:
            content['summary'] = self.parse_summary(data, word)
        except AttributeError:
            raise NotFoundError(word)

        # Handle explain (required)
        try:
            content['explain'] = self.parse_explain(data)
        except IndexError:
            raise NotFoundError(word)

        # Extract verbose (optional)
        content['verbose'] = self.parse_verbose(data)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record
Пример #6
0
    def query(self, word: str):
        content = self._get_raw(word)
        content_json = json.loads(content)
        if not content_json['data']:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record
Пример #7
0
    def test_show(self):
        content = '''
        {
            "code": 200,
            "lang": "ru-en",
            "text": ["house"]
        }
        '''
        r = Record(word='дом', content=content, source=self.dict.provider)

        # god bless this method, hope that it do not raise any exception
        self.dict.show(r)
Пример #8
0
    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content = json.loads(content)

        try:
            # Get the first definition string from JSON.
            content = content['en']
        except KeyError:
            # API can return JSON that does not contain 'en' language.
            raise NotFoundError(word)

        # Define a list that will be used to create a Record.
        r_content = []

        # For every part of speech append r_content corresponding list.
        for i, d in enumerate(content):
            # Add what part of speech current definitions refers to.
            r_content.append({'part_of_speech': d['partOfSpeech']})

            # Create a list that will store english_definitions
            # of the current part of speech.
            r_content[i]['definitions'] = []

            for j, d2 in enumerate(d['definitions']):
                # Parse definition and append definitions list.
                definition = BeautifulSoup(d2['definition'],
                                           "html.parser").text
                r_content[i]['definitions'].append({'definition': definition})

                # If API provides examples for the current definition
                # create a new list and append them.
                try:
                    d2['examples']
                except KeyError:
                    pass
                else:
                    r_content[i]['definitions'][j]['examples'] = []
                    for ex in d2['examples']:
                        ex = BeautifulSoup(ex, "html.parser").text
                        r_content[i]['definitions'][j]['examples'].append(ex)

        record = Record(
            word=word,
            content=json.dumps(r_content),
            source=self.provider,
        )

        return record
Пример #9
0
    def query(self, word: str):
        content = self._get_raw(word)

        if "no_results" in content:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )

        return record
Пример #10
0
    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )

        return record
Пример #11
0
    def query(self, word: str):
        content_str = self._get_raw(word)
        content_dict = json.loads(content_str)

        if content_dict['list'] == []:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content_str,
            source=self.provider,
        )

        return record
Пример #12
0
    def test_show(self):
        content = '''
        {
            "list": [
                {
                    "word": "mock",
                    "definition": "Mock",
                    "example": "..."
                }
            ]
        }
        '''
        r = Record(word='mock', content=content, source='urban')

        # god bless this method, hope that it do not raise any exception
        self.dict.show(r)
Пример #13
0
def get_pyjoke(pyjokes, word: str):
    if not pyjokes:
        return

    try:
        # very basic string searching in jokes
        r = random.choice(
            list(
                filter(
                    lambda j: word in map(
                        lambda x: ''.join(c for c in x
                                          if c.isalnum()), j.split()),
                    pyjokes.get_jokes())))
    except IndexError:
        return
    else:
        return Record(word=word, content=r, source='pyjokes')
Пример #14
0
    def test_show(self):
        content = '''
        {
            "h": [{
                "T": "bo̍k-ní",
                "_": "928",
                "d": [{
                    "f": "蕈`菇~`類~。`生長~`在~`朽~`腐~`的~`樹~`幹~`上~ ...",
                    "type": "`名~"
                }]
            }],
            "t": "`木~`耳~"
        }
        '''
        r = Record(word='木耳', content=content, source=self.dict.provider)

        # god bless this method, hope that it do not raise any exception
        self.dict.show(r)
Пример #15
0
    def query(self, word: str):
        try:
            app_id, app_key = self._get_app_key()
            content = self._get_raw(word,
                                    headers={
                                        'app_id': app_id,
                                        'app_key': app_key
                                    })
        except QueryError as exception:
            msg = self.status_code.get(exception.status_code,
                                       'Some bad thing happened')
            self.color.print('Oxford: ' + msg, 'red')
            raise NotFoundError(exception.word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record
Пример #16
0
    def query(self, word: str):
        requests.packages.urllib3.disable_warnings()
        content = self._get_raw(word, verify=False)

        data = {"title": word, "sources": defaultdict(list)}
        soup = BeautifulSoup(content, "html.parser")
        for tr in soup.find_all("tr", {"class": "dash"}):
            source = (
                tr.find("td", attrs={"class": "sourceW"}).find("a").text
            ).strip()
            en = tr.find("td", attrs={"class": "ennameW"}).text.strip()
            zhtw = tr.find("td", attrs={"class": "zhtwnameW"}).text.strip()
            data["sources"][source].append((en, zhtw))

        if len(data["sources"]) == 0:
            raise NotFoundError(word)

        record = Record(
            word=word, content=json.dumps(data), source=self.provider
        )
        return record
Пример #17
0
    def query(self, word: str):
        try:
            content = self._get_raw(word)
        except QueryError as exception:
            raise NotFoundError(exception.word)

        content_json = json.loads(content)

        status = content_json.get('code')
        if status != 200:
            # https://tech.yandex.com/translate/doc/dg/reference/translate-docpage/#codes
            message = self.status_code.get(
                status, 'Some bad thing happened with Yandex')
            print('Yandex: ' + message)
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=content,
            source=self.provider,
        )
        return record
Пример #18
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        # handle record.word
        try:
            content['word'] = data.find('span', id='term').text
        except AttributeError:
            raise NotFoundError(word)

        # handle pronounce
        pronu_value = data.find('span', id='pronunciation_pos').text
        if pronu_value:
            content['pronounce'] = []
            for match in re.finditer('(\w+)(\[.*?\])', pronu_value):
                content['pronounce'].append(match.group(1, 2))

        # handle sound
        proun_sound = data.find(
            'span',
            style="display: none;",
            id="iconStyle",
            class_="tri",
            title="http://product.dreye.com.tw/",
        )
        if proun_sound:
            content['sound'] = {}
            d = json.loads(proun_sound.text)

            sound_types_and_urls = (d.get('sound_url_1', []) +
                                    d.get('sound_url_2', []))
            sound_accents = (d.get('sound_type_1', []) +
                             d.get('sound_type_2', []))

            for sound_type_and_url, sound_accent in zip(
                    sound_types_and_urls, sound_accents):
                if sound_type_and_url:
                    sound_type, sound_url = list(sound_type_and_url.items())[0]
                    content['sound'].setdefault(sound_type, {}).setdefault(
                        sound_accent, []).append(sound_url)

        # Handle explain
        main_explanations = data.find(
            class_='dd algo explain mt-20 lst DictionaryResults')
        if main_explanations:
            main_explanations = itertools.zip_longest(
                main_explanations.find_all(class_='compTitle mb-10'),
                main_explanations.find_all(
                    class_='compArticleList mb-15 ml-10', ))
        else:
            main_explanations = ""

        content['explain'] = []
        for part_of_speech, meaning in main_explanations:
            node = [part_of_speech.text] if part_of_speech else ['']

            for item in meaning.find_all('li', class_='ov-a'):
                pack = [item.find('h4').text]

                for example in (
                        tag for tag in item.find_all('span')
                        if 'line-height: 17px;' not in tag.get('style', {})):
                    sentence = ''

                    for w in example.contents:
                        if w.name == 'b':
                            sentence += '*' + w.text + '*'
                        else:
                            try:
                                sentence += w
                            except Exception:
                                pass

                    pack.append((sentence.strip()))
                node.append(pack)
            content['explain'].append(node)

            # verbose info
            part_of_speech_list, meaning_list = [], []
            content['verbose'] = []

            variation_explanations = data.find(
                class_='dd algo variation fst DictionaryResults')
            if variation_explanations:
                part_of_speech_list.extend(
                    variation_explanations.find_all(class_='compTitle'))
                meaning_list.extend(
                    variation_explanations.find_all(class_='compArticleList'))

            additional_explanations = data.find(
                class_='dd algo othersNew lst DictionaryResults')
            if additional_explanations:
                part_of_speech_list.extend(
                    additional_explanations.find_all(class_='compTitle mt-26'))
                meaning_list.extend(
                    additional_explanations.find_all(class_='compArticleList'))

            more_explanations = itertools.zip_longest(part_of_speech_list,
                                                      meaning_list)

            for part_of_speech, meaning in more_explanations:
                node = [part_of_speech.text] if part_of_speech else ['']

                if meaning:
                    for item in meaning.find_all('li', class_='ov-a'):
                        pack = [item.find('h4').text]

                        for example in (
                                tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                            sentence = ''

                            for w in example.contents:
                                if w.name == 'b':
                                    sentence += '*' + w.text + '*'
                                else:
                                    try:
                                        sentence += w
                                    except Exception:
                                        pass

                            pack.append((sentence.strip()))
                        node.append(pack)
                content['verbose'].append(node)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record
Пример #19
0
 def test_show(self):
     r = Record(word='string',
                content=SAMPLE_RESPONSE,
                source=self.dict.provider)
     self.dict.show(r)
Пример #20
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        response = json.loads(soup.text)

        # Not Found
        if not response.get("列表"):
            raise NotFoundError(word)

        # Show Chinese word from iTaigi in stead of user input if possible
        with suppress(KeyError, IndexError):
            word = response["列表"][0]["外語資料"]

        content = {}

        # Fetch basic words with text, pronounce and sentence
        try:
            basic_words = response["列表"][0]["新詞文本"]
        except Exception:
            raise
        else:
            content['basic_words'] = []
            for basic_word in basic_words:
                d = {}

                text = self._get_word_text(basic_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(basic_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['basic_words'].append(d)

            # Fix issue-452 for iTaigi testings
            # iTaigi returns basic_words in random order.
            # Since we store basic_words in a list,
            # We have to sort it before saving into database
            # or the unit-testings would fail.
            content['basic_words'].sort(key=lambda word: word['text'])

        # Fetch related words
        try:
            related_words = response["其他建議"]
        except Exception:
            raise
        else:
            content['related_words'] = []
            for related_word in related_words:
                d = {}

                text = self._get_word_text(related_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(related_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['related_words'].append(d)

        # Save content with word and provider.
        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #21
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        # handle record.word
        try:
            content['word'] = data.find('span', id='term').text
        except AttributeError:
            raise NotFoundError(word)

        # handle pronounce
        pronu_value = data.find('span', id='pronunciation_pos').text
        if pronu_value:
            content['pronounce'] = []
            for match in re.finditer('(\w+)(\[.*?\])', pronu_value):
                content['pronounce'].append(match.group(1, 2))

        # handle sound
        pronu_sound = data.find(class_='proun_sound')
        if pronu_sound:
            content['sound'] = [
                ('mp3',
                 pronu_sound.find(class_='source',
                                  attrs={
                                      'data-type': 'audio/mpeg'
                                  }).attrs['data-src']),
                ('ogg',
                 pronu_sound.find(class_='source',
                                  attrs={
                                      'data-type': 'audio/ogg'
                                  }).attrs['data-src']),
            ]

        # Handle explain
        main_explanations = data.find(
            class_='dd algo explain mt-20 lst DictionaryResults')
        if main_explanations:
            main_explanations = itertools.zip_longest(
                main_explanations.find_all(class_='compTitle mb-10'),
                main_explanations.find_all(
                    class_='compArticleList mb-15 ml-10', ))
        else:
            main_explanations = ""

        content['explain'] = []
        for part_of_speech, meaning in main_explanations:
            node = [part_of_speech.text] if part_of_speech else ['']

            for item in meaning.find_all('li', class_='ov-a'):
                pack = [item.find('h4').text]

                for example in (tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                    sentence = ''

                    for w in example.contents:
                        if w.name == 'b':
                            sentence += '*' + w.text + '*'
                        else:
                            try:
                                sentence += w
                            except:
                                pass

                    pack.append((sentence.strip()))
                node.append(pack)
            content['explain'].append(node)

            # verbose info
            part_of_speech_list, meaning_list = [], []
            content['verbose'] = []

            variation_explanations = data.find(
                class_='dd algo variation fst DictionaryResults')
            if variation_explanations:
                part_of_speech_list.extend(
                    variation_explanations.find_all(class_='compTitle'))
                meaning_list.extend(
                    variation_explanations.find_all(class_='compArticleList'))

            additional_explanations = data.find(
                class_='dd algo othersNew lst DictionaryResults')
            if additional_explanations:
                part_of_speech_list.extend(
                    additional_explanations.find_all(class_='compTitle mt-26'))
                meaning_list.extend(
                    additional_explanations.find_all(class_='compArticleList'))

            more_explanations = itertools.zip_longest(part_of_speech_list,
                                                      meaning_list)

            for part_of_speech, meaning in more_explanations:
                node = [part_of_speech.text] if part_of_speech else ['']

                if meaning:
                    for item in meaning.find_all('li', class_='ov-a'):
                        pack = [item.find('h4').text]

                        for example in (
                                tag for tag in item.find_all('span')
                                if 'line-height: 17px;' not in tag['style']):
                            sentence = ''

                            for w in example.contents:
                                if w.name == 'b':
                                    sentence += '*' + w.text + '*'
                                else:
                                    try:
                                        sentence += w
                                    except:
                                        pass

                            pack.append((sentence.strip()))
                        node.append(pack)
                content['verbose'].append(node)

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )
        return record
Пример #22
0
    def test_show(self):
        content = '{"definition": "string"}'

        r = Record(word="string", content=content, source=self.dict.provider)
        self.dict.show(r)
Пример #23
0
    def query(self, word: str):
        r = requests.post(
            self.POST_API,
            json={
                "c": "1",
                "t": "all",
                "q": word,
            },
        )
        content = self._get_raw(word, cookies=r.cookies)

        data = {
            "title": word,
            "exact_sources": defaultdict(list),
            "fuzzy_sources": defaultdict(list),
        }
        soup = BeautifulSoup(content, "html.parser")

        # Exact matching
        exact = soup.find(id="accordion_cross")
        if exact:
            for div in exact.find_all("div", {"class": "panel"}):
                title = div.find("div", {"class": "title"}).find("a").text
                defs = [
                    div.find("strong", {
                        "class": "word"
                    }).find_all("span")[-1].text.strip()
                ]
                source = div.find("strong", {"class": "race"}).text

                for i in div.find_all("li")[1:]:
                    d = i.find("strong", {"class": "word"})
                    if d:
                        defs.append(d.find_all("span")[-1].text.strip())

                link = (self.BASE_URL +
                        div.find("a", {"class": "btn-more"})["href"])
                data["exact_sources"][source] = {
                    "title": title,
                    "defs": defs,
                    "link": link,
                }

        # Fuzzy matching
        fuzzy = soup.find(id="accordion")
        if fuzzy:
            for div in fuzzy.find_all("div", {"class": "panel"}):
                title = div.find("div", {"class": "title"}).find("a").text
                defs = [
                    div.find("strong", {
                        "class": "word"
                    }).find_all("span")[-1].text.strip()
                ]
                source = div.find("strong", {"class": "race"}).text

                for i in div.find_all("li")[1:]:
                    d = i.find("strong", {"class": "word"})
                    if d:
                        defs.append(d.find_all("span")[-1].text.strip())

                link = (self.BASE_URL + div.find("div", {
                    "class": "title"
                }).find("a")["href"])
                data["fuzzy_sources"][source].append({
                    "title": title,
                    "defs": defs,
                    "link": link
                })

        if not exact and not fuzzy:
            raise NotFoundError(word)

        record = Record(
            word=word,
            content=json.dumps(data),
            source=self.provider,
        )
        return record
Пример #24
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        response = json.loads(soup.text)

        # Not Found
        if not response.get("列表"):
            raise NotFoundError(word)

        # Show Chinese word from iTaigi in stead of user input if possible
        with suppress(KeyError, IndexError):
            word = response["列表"][0]["外語資料"]

        content = {}

        # Fetch basic words with text, pronounce and sentence
        try:
            basic_words = response["列表"][0]["新詞文本"]
        except Exception:
            raise
        else:
            content['basic_words'] = []
            for basic_word in basic_words:
                d = {}

                text = self._get_word_text(basic_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(basic_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['basic_words'].append(d)

        # Fetch related words
        try:
            related_words = response["其他建議"]
        except Exception:
            raise
        else:
            content['related_words'] = []
            for related_word in related_words:
                d = {}

                text = self._get_word_text(related_word)
                d['text'] = text

                pronounce = self._get_word_pronounce(related_word)
                d['pronounce'] = pronounce

                if self.args.verbose:
                    sentences = self._get_word_sentences(text, pronounce)
                    d['sentences'] = sentences

                content['related_words'].append(d)

        # Save content with word and provider.
        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #25
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        data = BeautifulSoup(webpage, "html.parser")
        content = {}

        card = data.find('div', attrs={'class': 'card'})
        entry = card.find(
            # just get the first one
            attrs={'class': 'dictionary-entry'})

        if not entry:
            raise NotFoundError(word)

        content['explains'] = []

        # word can be existing in both English & Spanish
        word_element = (card.find(attrs={'id': 'headword-en'})
                        or card.find(attrs={'id': 'headword-es'}))
        if word_element is None:
            raise NotFoundError(word)
        content['word'] = word_element.text

        pattern1 = {'class': 'dictionary-neodict-indent-1'}
        pattern2 = {'class': 'dictionary-neodict-indent-2'}
        pattern3 = {'class': 'dictionary-neodict-indent-3'}
        pattern_order = {'class': 'dictionary-neodict-translation'}
        pattern_example = {'class': 'dictionary-neodict-example'}
        pattern1_en = {'class': 'dictionary-neoharrap-indent-1'}
        pattern2_en = {'class': 'dictionary-neoharrap-indent-2'}
        pattern_order_en = {'class': 'dictionary-neoharrap-translation'}

        speeches = card.find_all(attrs={'class': 'part_of_speech'})

        for (speech, category) in zip(
                speeches,
                entry.find_all(attrs=pattern1)
                or entry.find_all(attrs=pattern1_en)):
            result = []
            content['explains'].append([speech.text, result])
            context = category.find(attrs={'class': 'context'}).text
            explains = []

            for explain in (category.find_all(attrs=pattern2)
                            or category.find_all(attrs=pattern2_en)):

                orders = (explain.find_all(attrs=pattern_order)
                          or explain.find_all(attrs=pattern_order_en))

                if orders:
                    # e.g.
                    #
                    #   ('a. forgiveness', 'b. pardon (law)')
                    #
                    indices = tuple(
                        map(lambda x: x.text.replace('\xa0', ' ').strip(),
                            orders))
                else:
                    continue

                examples = explain.find_all(attrs=pattern3)

                for (example, index) in zip(examples, indices):
                    t = tuple(example.find(attrs=pattern_example))
                    (spanish, english) = (t[0].text, t[2].text)
                    explains.append((index, spanish, english))

                if (not examples) and (len(indices) > 0):
                    for index in indices:
                        explains.append((index, ))

            result.append([context, explains])

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record
Пример #26
0
    def query(self, word: str):
        webpage = self._get_raw(word)
        soup = BeautifulSoup(webpage, "html.parser")
        content = {}

        en_css = "#dictionary-neodict-en"
        es_css = "#dictionary-neodict-es"
        card = soup.select_one(en_css) or soup.select_one(es_css)
        if card is None:
            raise NotFoundError(word)

        word_css = "div > div:nth-child(1) > span"
        word_element = card.select_one(word_css)
        if word_element is None:
            raise NotFoundError(word)
        content['word'] = word_element.text
        '''
        COPULAR VERB  # speech
            # categories_card
            1. (used to express a permanent quality)  # category_text
                # explanation
                a. ser  # index
                # examples
                    # example
                    The ocean is blue.
                    El océano es azul.
            2. (used to express a temporary state)
                a. estar
                    I'm not in a good mood today.
                    Hoy no estoy de buen humor.

                    The sky is cloudy.
                    El cielo está nublado.
        ... (Another speech if it has.)
        '''
        speech_pattern = "div > div:nth-child(2)"
        # "#dictionary-neodict-en > div > div:nth-child(2)"

        # Start to grab
        content['explains'] = []
        speech = card.select_one(speech_pattern)
        while speech:
            result = []
            speech_text, categories_card = speech.children
            speech_text_element = speech_text.find(['a', 'span'])
            content['explains'].append([speech_text_element.text, result])

            for category in categories_card.children:
                category_text_element, explanations_card = category.children
                category_text = category_text_element.text

                explains = []
                for explanation in explanations_card.children:
                    for _ in explanation.children:
                        index_elements, examples = (_.contents[:-1],
                                                    _.contents[-1])
                        index = ' '.join([
                            _.text.strip() for _ in index_elements if _ != ' '
                        ])

                        if (not examples) and index:
                            explains.append((index, ))
                            continue

                        sentences = []
                        for example in examples:
                            t = example.find_all()
                            # Should be only 3 elements
                            # [text, —,  text]
                            '''
                            When Spanish => English, it will show Spanish first
                            When English => Spanish, it will show English first
                            So, the variables below are not definitely
                            '''
                            sentences.append((t[0].text, t[2].text))
                        explains.append((index, sentences))

                result.append([category_text, explains])
            speech = speech.next_sibling

        record = Record(
            word=word,
            content=json.dumps(content),
            source=self.provider,
        )

        return record