Python flatten_multidimensional_list примеры, wordhoard.utilities.cleansing.flatten_multidimensional_list Python примеры использования

Пример #1

0

Показать файл

    def find_antonyms(self):
        """
        Purpose
        ----------
        This function queries multiple online repositories to discover antonyms
        associated with the specific word provided to the Class Antonyms.
        The antonyms are deduplicated and sorted alphabetically.

        Returns
        ----------
        :returns:
            antonyms: list of antonyms

        :rtype: list
        """
        valid_word = self._validate_word()
        if valid_word:
            check_cache = self._check_cache()
            if check_cache[0] is True:
                antonyms = cleansing.flatten_multidimensional_list(check_cache[1])
                if self._output_format == 'list':
                    return sorted(set(antonyms))
                elif self._output_format == 'dictionary':
                    output_dict = {self._word: sorted(set(antonyms))}
                    return output_dict
                elif self._output_format == 'json':
                    json_object = json.dumps({'antonyms': {self._word: sorted(set(antonyms))}},
                                             indent=4, ensure_ascii=False)
                    return json_object

            elif check_cache[0] is False:
                antonyms_01 = self._query_thesaurus_com()
                antonyms_02 = self._query_wordhippo()
                antonyms = ([x for x in [antonyms_01, antonyms_02] if x is not None])
                antonyms_results = cleansing.flatten_multidimensional_list(antonyms)
                if len(antonyms_results) != 0:
                    if self._output_format == 'list':
                        return sorted(set(antonyms_results))
                    elif self._output_format == 'dictionary':
                        output_dict = {self._word: sorted(set(antonyms_results))}
                        return output_dict
                    elif self._output_format == 'json':
                        json_object = json.dumps({'antonyms': {self._word: sorted(set(antonyms_results))}},
                                                 indent=4, ensure_ascii=False)
                        return json_object
                else:
                    return _colorized_text(255, 0, 255,
                                           f'antonyms were found for the word: {self._word} \n'
                                           f'Please verify that the word is spelled correctly.')
        else:
            return _colorized_text(255, 0, 255, f'Please verify that the word {self._word} is spelled correctly.')

Пример #2

0

Показать файл

def query_thesaurus_com(single_word):
    """
    This function queries thesaurus.com for antonyms
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: list of antonyms
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_antonyms(single_word, 'thesaurus_com')
        if not check_cache:
            try:
                req = requests.get(
                    f'https://tuna.thesaurus.com/pageData/{single_word}',
                    headers=basic_soup.http_headers,
                    allow_redirects=True,
                    verify=True,
                    timeout=30)
                if '{"data":null}' not in req.text:
                    dict_antonyms = req.json(
                    )['data']['definitionData']['definitions'][0]['antonyms']
                    if dict_antonyms:
                        antonyms = sorted([r["term"] for r in dict_antonyms])
                        caching.insert_word_cache_antonyms(
                            single_word, 'thesaurus_com', antonyms)
                        return antonyms
                    else:
                        logger.error(
                            f'The word {single_word} has no antonyms on thesaurus.com.'
                        )
                else:
                    logger.error(
                        f'The word {single_word} was not found on thesaurus.com.'
                    )
            except requests.HTTPError as e:
                logger.error('A HTTP error has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.ConnectionError as e:
                if requests.codes:
                    'Failed to establish a new connection'
                    logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.Timeout as e:
                logger.error('A connection timeout has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.RequestException as e:
                logger.error('An ambiguous exception occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            antonyms = cleansing.flatten_multidimensional_list(
                [val for val in check_cache.values()])
            return antonyms
    else:
        logger.error(f'The word {single_word} was not in a valid format.')
        logger.error(
            f'Please verify that the word {single_word} is spelled correctly.')

Пример #3

0

Показать файл

    def find_definitions(self):
        """
        Purpose
        ----------
        This function queries multiple online repositories to discover
        definitions related with the specific word provided to the
        Class Definitions.

        Returns
        ----------
        :return: list of definitions
        :rtype: list
        """
        valid_word = self._validate_word()
        if valid_word:
            check_cache = caching.cache_antonyms(self._word)
            if check_cache is False:
                definition_01 = self._query_collins_dictionary()
                definition_02 = self._query_merriam_webster()
                definition_03 = self._query_synonym_com()
                definitions = ([
                    x for x in [definition_01, definition_02, definition_03]
                    if x is not None
                ])
                definitions = cleansing.flatten_multidimensional_list(
                    definitions)
                if not definitions:
                    return f'No definitions were found for the word: {self._word}'
                else:
                    return sorted(set(definitions))
            else:
                definitions = cleansing.flatten_multidimensional_list(
                    [val for val in check_cache.values()])
                return sorted(set(definitions))
        else:
            return f'Please verify that the word {self._word} is spelled correctly.'

Пример #4

0

Показать файл

    def find_synonyms(self):
        """
        Purpose
        ----------
        This function queries multiple online repositories to discover synonyms
        associated with the specific word provided to the Class Synonyms.
        The synonyms are deduplicated and sorted alphabetically.

        Returns
        ----------
        :returns:
            synonyms: list of synonyms

        :rtype: list
        """
        valid_word = self._validate_word()
        if valid_word:
            check_cache = self._check_cache()
            if check_cache is False:
                synonyms_01 = self._query_collins_dictionary()
                synonyms_02 = self._query_merriam_webster()
                synonyms_03 = self._query_synonym_com()
                synonyms_04 = self._query_thesaurus_com()
                synonyms_05 = self._query_wordnet()
                synonyms = ([x for x in [synonyms_01, synonyms_02, synonyms_03, synonyms_04, synonyms_05]
                             if x is not None])
                synonyms_results = cleansing.flatten_multidimensional_list(synonyms)
                if not synonyms_results:
                    return f'No synonyms were found for the word: {self._word}'
                else:
                    return sorted(set([word.lower() for word in synonyms_results]))
            else:
                synonyms = cleansing.flatten_multidimensional_list([val for val in check_cache.values()])
                return sorted(set(synonyms))
        else:
            return f'Please verify that the word {self._word} is spelled correctly.'

Пример #5

0

Показать файл

Файл: dictionary.py Проект: lgs/wordhoard

def query_synonym_com(single_word):
    """
    This function queries synonym.com for a definition
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: definition for the word
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_definition(single_word, 'synonym_com')
        if not check_cache:
            try:
                results_definition = basic_soup.get_single_page_html(f'https://www.synonym.com/synonyms/{single_word}')
                soup = BeautifulSoup(results_definition, "lxml")
                description_tag = soup.find("meta", property="og:description")
                if 'find any words based on your search' not in description_tag['content']:
                    find_definition = regex.split(r'\|', description_tag['content'])
                    definition_list = find_definition[1].lstrip().replace('definition:', '').split(',')
                    definition = [cleansing.normalize_space(i) for i in definition_list]
                    definition_list_to_string = ' '.join([str(elem) for elem in definition])
                    caching.insert_word_cache_definition(single_word, 'synonym_com', definition_list_to_string)
                    return definition_list_to_string
                else:
                    logger.error(f'synonym.com had no reference for the word {single_word}')
            except bs4.FeatureNotFound as e:
                logger.error('An error occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except AttributeError as e:
                logger.error('An AttributeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except KeyError as e:
                logger.error('A KeyError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except TypeError as e:
                logger.error('A TypeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            definition = cleansing.flatten_multidimensional_list([val for val in check_cache.values()])
            return definition
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(f'Please verify that the word {single_word} is spelled correctly.')

Пример #6

0

Показать файл

Файл: dictionary.py Проект: lgs/wordhoard

def query_collins_dictionary_synonym(single_word):
    """
    This function queries collinsdictionary.com for a definition
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: definition for the word
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_definition(single_word, 'collins_dictionary')
        if not check_cache:
            try:
                results_definition = basic_soup.get_single_page_html(
                    f'https://www.collinsdictionary.com/dictionary/english-thesaurus/{single_word}')
                query_results = basic_soup.query_html(results_definition, 'div', 'class',
                                                      'form type-def titleTypeSubContainer')
                if query_results is not None:
                    definition = query_results.findNext('div', {'class': 'def'})
                    caching.insert_word_cache_definition(single_word, 'collins_dictionary', definition.text)
                    return definition.text
                else:
                    logger.error(f'Collins Dictionary had no reference for the word {single_word}')
            except bs4.FeatureNotFound as e:
                logger.error('An error occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except AttributeError as e:
                logger.error('An AttributeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except KeyError as e:
                logger.error('A KeyError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except TypeError as e:
                logger.error('A TypeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            definition = cleansing.flatten_multidimensional_list([val for val in check_cache.values()])
            return definition
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(f'Please verify that the word {single_word} is spelled correctly.')

Пример #7

0

Показать файл

Файл: dictionary.py Проект: lgs/wordhoard

def query_thesaurus_com(single_word):
    """
    This function queries thesaurus.com for a definition
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: definition for the word
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_definition(single_word, 'thesaurus_com')
        if not check_cache:
            try:
                req = requests.get(f'https://tuna.thesaurus.com/pageData/{single_word}',
                                   headers=basic_soup.http_headers,
                                   allow_redirects=True, verify=True, timeout=30)
                if req.json()['data'] is not None:
                    definition = req.json()['data']['definitionData']['definitions'][0]['definition']
                    caching.insert_word_cache_definition(single_word, 'thesaurus_com', definition)
                    return definition
                else:
                    logger.error(f'thesaurus.com had no reference for the word {single_word}')
            except requests.HTTPError as e:
                logger.error('A HTTP error has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.ConnectionError as e:
                if requests.codes:
                    'Failed to establish a new connection'
                    logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.Timeout as e:
                logger.error('A connection timeout has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.RequestException as e:
                logger.error('An ambiguous exception occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            definition = cleansing.flatten_multidimensional_list([val for val in check_cache.values()])
            return definition
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(f'Please verify that the word {single_word} is spelled correctly.')

Пример #8

0

Показать файл

    def find_hyponyms(self):
        """
        Purpose
        ----------
        This function queries classicthesaurus_com for hyponyms associated
        with the specific word provided to the Class Hyponyms.

        Returns
        ----------
         :returns:
             hyponyms: list of hyponyms

        :rtype: list

        Raises
        ----------
        :raises
            AttributeError: Raised when an attribute reference or assignment fails.

            IndexError: Raised when a sequence subscript is out of range

            KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys.

            TypeError: Raised when an operation or function is applied to an object of inappropriate type.

            bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features
            is found
        """
        valid_word = self._validate_word()
        if valid_word:
            check_cache = self._check_cache()
            if check_cache is False:
                try:
                    if self._proxies is None:
                        response = Query(
                            f'https://www.classicthesaurus.com/{self._word}/narrower'
                        ).get_single_page_html()
                        if response.status_code == 404:
                            logger.info(
                                f'Classic Thesaurus had no hyponyms reference for the word {self._word}'
                            )
                        else:
                            soup = BeautifulSoup(response.text, "lxml")
                            hyponym = _get_hyponyms(soup)
                            if 'no hyponyms found' in hyponym:
                                return f'No hyponyms were found for the word: {self._word}'
                            else:
                                number_of_pages = _get_number_of_pages(soup)
                                if number_of_pages >= 2:
                                    for page in range(2, number_of_pages):
                                        sub_html = Query(
                                            f'https://www.classicthesaurus.com/{self._word}/narrower/'
                                            f'{page}').get_single_page_html()
                                        sub_soup = BeautifulSoup(
                                            sub_html.text, 'lxml')
                                        additional_hyponym = _get_hyponyms(
                                            sub_soup)
                                        hyponym.union(additional_hyponym)
                                self._update_cache(sorted(hyponym))
                                return sorted(set(hyponym))
                    elif self._proxies is not None:
                        response = Query(
                            f'https://www.classicthesaurus.com/{self._word}/narrower',
                            self._proxies).get_single_page_html()
                        if response.status_code == 404:
                            logger.info(
                                f'Classic Thesaurus had no hyponyms reference for the word {self._word}'
                            )
                        else:
                            soup = BeautifulSoup(response.text, "lxml")
                            hyponym = _get_hyponyms(soup)
                            if 'no hyponyms found' in hyponym:
                                return f'No hyponyms were found for the word: {self._word}'
                            else:
                                number_of_pages = _get_number_of_pages(soup)
                                if number_of_pages >= 2:
                                    for page in range(2, number_of_pages):
                                        sub_html = Query(
                                            f'https://www.classicthesaurus.com/{self._word}/narrower/'
                                            f'{page}', self._proxies
                                        ).get_single_page_html()
                                        sub_soup = BeautifulSoup(
                                            sub_html.text, 'lxml')
                                        additional_hyponym = _get_hyponyms(
                                            sub_soup)
                                        hyponym.union(additional_hyponym)
                                self._update_cache(sorted(hyponym))
                                return sorted(set(hyponym))
                except bs4.FeatureNotFound as error:
                    logger.error(
                        'An error occurred in the following code segment:')
                    logger.error(''.join(
                        traceback.format_tb(error.__traceback__)))
                except AttributeError as error:
                    logger.error(
                        'An AttributeError occurred in the following code segment:'
                    )
                    logger.error(''.join(
                        traceback.format_tb(error.__traceback__)))
                except IndexError as error:
                    logger.error(
                        'An IndexError occurred in the following code segment:'
                    )
                    logger.error(''.join(
                        traceback.format_tb(error.__traceback__)))
                except KeyError as error:
                    logger.error(
                        'A KeyError occurred in the following code segment:')
                    logger.error(''.join(
                        traceback.format_tb(error.__traceback__)))
                except TypeError as error:
                    logger.error(
                        'A TypeError occurred in the following code segment:')
                    logger.error(''.join(
                        traceback.format_tb(error.__traceback__)))
            else:
                hyponym = cleansing.flatten_multidimensional_list(
                    [val for val in check_cache.values()])
                return hyponym

Пример #9

0

Показать файл

Файл: hypernyms.py Проект: johnbumgarner/wordhoard

    def find_hypernyms(self):
        """
        Purpose
        ----------
        This function queries classicthesaurus_com for hypernyms associated
        with the specific word provided to the Class Hypernyms.

        Returns
        ----------
        :returns:
            hypernym: list of hypernyms

        :rtype: list

        Raises
        ----------
        :raises
            AttributeError: Raised when an attribute reference or assignment fails

            IndexError: Raised when a sequence subscript is out of range

            KeyError: Raised when a mapping (dictionary) key is not found in the set of existing keys

            TypeError: Raised when an operation or function is applied to an object of inappropriate type

            bs4.FeatureNotFound: raised by the BeautifulSoup constructor if no parser with the requested features
            is found
        """
        valid_word = self._validate_word()
        if valid_word:
            check_cache = self._check_cache()
            if check_cache[0] is True:
                hypernym = cleansing.flatten_multidimensional_list(list(check_cache[1]))
                if self._output_format == 'list':
                    return sorted(hypernym)
                elif self._output_format == 'dictionary':
                    output_dict = {self._word: sorted(set(hypernym))}
                    return output_dict
                elif self._output_format == 'json':
                    json_object = json.dumps({'hypernyms': {self._word: sorted(set(hypernym))}},
                                             indent=4, ensure_ascii=False)
                    return json_object

            elif check_cache[0] is False:
                try:
                    response = ''
                    if self._proxies is None:
                        if self._user_agent is None:
                            response = Query(
                                f'https://www.classicthesaurus.com/{self._word}/broader').get_single_page_html()
                        elif self._user_agent is not None:
                            response = Query(f'https://www.classicthesaurus.com/{self._word}/broader',
                                             user_agent=self._user_agent).get_single_page_html()

                    elif self._proxies is not None:
                        if self._user_agent is None:
                            response = Query(f'https://www.classicthesaurus.com/{self._word}/broader',
                                             proxies=self._proxies).get_single_page_html()
                        elif self._user_agent is not None:
                            response = Query(f'https://www.classicthesaurus.com/{self._word}/broader',
                                             user_agent=self._user_agent, proxies=self._proxies).get_single_page_html()

                    if response.status_code == 404:
                        logger.info(f'Classic Thesaurus had no hypernyms reference for the word {self._word}')
                    else:
                        soup = BeautifulSoup(response.text, "lxml")
                        cloudflare_protection = CloudflareVerification('https://www.classicthesaurus.com',
                                                                       soup).cloudflare_protected_url()
                        if cloudflare_protection is False:
                            hypernym = _get_hypernyms(soup)
                            if 'no hypernyms found' in hypernym:
                                return _colorized_text(255, 0, 255,
                                                       f'No hypernyms were found for the word: {self._word} \n'
                                                       f'Please verify that the word is spelled correctly.')
                            else:
                                number_of_pages = _get_number_of_pages(soup)
                                if number_of_pages >= 2:
                                    for page in range(2, number_of_pages):
                                        sub_html = ''
                                        if self._proxies is None:
                                            if self._user_agent is None:
                                                sub_html = Query(
                                                    f'https://www.classicthesaurus.com/{self._word}/broader/{page}').get_single_page_html()
                                            elif self._user_agent is not None:
                                                sub_html = Query(
                                                    f'https://www.classicthesaurus.com/{self._word}/broader/{page}',
                                                    user_agent=self._user_agent).get_single_page_html()
                                        elif self._proxies is not None:
                                            if self._user_agent is None:
                                                sub_html = Query(
                                                    f'https://www.classicthesaurus.com/{self._word}/broader/{page}',
                                                    proxies=self._proxies).get_single_page_html()
                                            elif self._user_agent is not None:
                                                sub_html = Query(
                                                    f'https://www.classicthesaurus.com/{self._word}/broader/{page}',
                                                    user_agent=self._user_agent,
                                                    proxies=self._proxies).get_single_page_html()

                                        sub_soup = BeautifulSoup(sub_html.text, 'lxml')
                                        additional_hypernym = _get_hypernyms(sub_soup)
                                        if additional_hypernym:
                                            hypernym.union(additional_hypernym)
                                self._update_cache(hypernym)
                                if self._output_format == 'list':
                                    return sorted(set(hypernym))
                                elif self._output_format == 'dictionary':
                                    output_dict = {self._word: sorted(set(hypernym))}
                                    return output_dict
                                elif self._output_format == 'json':
                                    json_object = json.dumps({'hypernyms': {self._word: sorted(set(hypernym))}},
                                                             indent=4, ensure_ascii=False)
                                    return json_object
                        elif cloudflare_protection is True:
                            logger.info('-' * 80)
                            logger.info(f'The following URL has Cloudflare DDoS mitigation service protection.')
                            logger.info('https://www.classicthesaurus.com')
                            logger.info('-' * 80)
                            return None

                except bs4.FeatureNotFound as error:
                    logger.error('An error occurred in the following code segment:')
                    logger.error(''.join(traceback.format_tb(error.__traceback__)))
                except AttributeError as error:
                    logger.error('An AttributeError occurred in the following code segment:')
                    logger.error(''.join(traceback.format_tb(error.__traceback__)))
                except IndexError as error:
                    logger.error('An IndexError occurred in the following code segment:')
                    logger.error(''.join(traceback.format_tb(error.__traceback__)))
                except KeyError as error:
                    logger.error('A KeyError occurred in the following code segment:')
                    logger.error(''.join(traceback.format_tb(error.__traceback__)))
                except TypeError as error:
                    logger.error('A TypeError occurred in the following code segment:')
                    logger.error(''.join(traceback.format_tb(error.__traceback__)))

Пример #10

0

Показать файл

    def find_definitions(self):
        """
        Purpose
        ----------
        This function queries multiple online repositories to discover
        definitions related with the specific word provided to the
        Class Definitions.

        Returns
        ----------
        :return: list of definitions

        :rtype: list
        """
        valid_word = self._validate_word()
        if valid_word:
            check_cache = caching.cache_antonyms(self._word)
            if check_cache[0] is True:
                definitions = cleansing.flatten_multidimensional_list(
                    check_cache[1])
                if self._output_format == 'list':
                    return sorted(set(definitions))
                elif self._output_format == 'dictionary':
                    output_dict = {self._word: sorted(set(definitions))}
                    return output_dict
                elif self._output_format == 'json':
                    json_object = json.dumps(
                        {
                            'definitions': {
                                self._word: sorted(set(definitions))
                            }
                        },
                        indent=4,
                        ensure_ascii=False)
                    return json_object

            elif check_cache[0] is False:
                # _query_collins_dictionary() disabled due to Cloudflare protection
                # definition_01 = self._query_collins_dictionary()

                definition_02 = self._query_merriam_webster()
                definition_03 = self._query_synonym_com()
                definitions = ([
                    x for x in [definition_02, definition_03] if x is not None
                ])
                definitions = cleansing.flatten_multidimensional_list(
                    definitions)
                if not definitions:
                    return _colorized_text(
                        255, 0, 255,
                        f'No definitions were found for the word: {self._word} \n'
                        f'Please verify that the word is spelled correctly.')
                else:
                    if self._output_format == 'list':
                        return sorted(set(definitions))
                    elif self._output_format == 'dictionary':
                        output_dict = {self._word: sorted(set(definitions))}
                        return output_dict
                    elif self._output_format == 'json':
                        json_object = json.dumps(
                            {
                                'definitions': {
                                    self._word: sorted(set(definitions))
                                }
                            },
                            indent=4,
                            ensure_ascii=False)
                        return json_object
        else:
            return _colorized_text(
                255, 0, 255,
                f'Please verify that the word {self._word} is spelled correctly.'
            )

Пример #11

0

Показать файл

Файл: synonyms.py Проект: lgs/wordhoard

def query_collins_dictionary_synonym(single_word):
    """
    This function queries collinsdictionary.com for synonyms
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: list of synonyms
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_synonyms(single_word, 'collins_dictionary')
        if not check_cache:
            try:
                synonyms = []
                results_synonym = basic_soup.get_single_page_html(
                    f'https://www.collinsdictionary.com/dictionary/english-thesaurus/{single_word}'
                )
                soup = BeautifulSoup(results_synonym, 'lxml')
                word_found = soup.find(
                    'h1',
                    text=
                    f'Sorry, no results for “{single_word}” in the English Thesaurus.'
                )
                if word_found:
                    logger.error(
                        f'Collins Dictionary had no reference for the word {single_word}'
                    )
                    logger.error(
                        f'Please verify that the word {single_word} is spelled correctly.'
                    )
                else:
                    query_results = basic_soup.query_html(
                        results_synonym, 'div', 'class', 'blockSyn')
                    content_descendants = query_results.descendants
                    for item in content_descendants:
                        if item.name == 'div' and item.get(
                                'class', 'form type-syn orth'):
                            children = item.findChild('span',
                                                      {'class': 'orth'})
                            if children is not None:
                                synonyms.append(children.text)
                    caching.insert_word_cache_synonyms(single_word,
                                                       'collins_dictionary',
                                                       synonyms)
                    return sorted(synonyms)
            except bs4.FeatureNotFound as e:
                logger.error(
                    'A BeautifulSoup error occurred in the following code segment:'
                )
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except AttributeError as e:
                logger.error(
                    'An AttributeError occurred in the following code segment:'
                )
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except KeyError as e:
                logger.error(
                    'A KeyError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except TypeError as e:
                logger.error(
                    'A TypeError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            synonyms = cleansing.flatten_multidimensional_list(
                [val for val in check_cache.values()])
            return synonyms
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(
            f'Please verify that the word {single_word} is spelled correctly.')

Пример #12

0

Показать файл

Файл: synonyms.py Проект: lgs/wordhoard

def query_wordnet(single_word):
    """
    This function queries wordnet for synonyms
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: list of synonyms
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_synonyms(single_word, 'wordnet')
        if not check_cache:
            try:
                synonyms = []
                results = requests.get(
                    f'http://wordnetweb.princeton.edu/perl/webwn?s={single_word}',
                    headers=basic_soup.http_headers,
                    allow_redirects=True,
                    verify=True,
                    timeout=30)
                soup = BeautifulSoup(results.text, "lxml")
                if soup.findAll('h3', text='Noun'):
                    parent_node = soup.findAll("ul")[0].findAll('li')
                    for children in parent_node:
                        for child in children.find_all(href=True):
                            if 'S:' not in child.contents[0]:
                                synonyms.append(child.contents[0])
                    synonyms = sorted([x.lower() for x in synonyms])
                    caching.insert_word_cache_synonyms(single_word, 'wordnet',
                                                       synonyms)
                    return synonyms
                else:
                    logger.error(
                        f'Wordnet had no reference for the word {single_word}')
                    logger.error(
                        f'Please verify that the word {single_word} is spelled correctly.'
                    )
            except IndexError as e:
                logger.error(
                    'A IndexError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.HTTPError as e:
                logger.error('A HTTP error has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.ConnectionError as e:
                if requests.codes:
                    'Failed to establish a new connection'
                    logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.Timeout as e:
                logger.error('A connection timeout has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.RequestException as e:
                logger.error('An ambiguous exception occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            synonyms = cleansing.flatten_multidimensional_list(
                [val for val in check_cache.values()])
            return synonyms
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(
            f'Please verify that the word {single_word} is spelled correctly.')

Пример #13

0

Показать файл

Файл: synonyms.py Проект: lgs/wordhoard

def query_thesaurus_plus(single_word):
    """
    This function queries thesaurus.plus for synonyms
    related to the 'single_word' parameter.
    :param single_word: string variable to search for
    :return: list of synonyms
    """
    valid_word = word_verification.validate_word_syntax(single_word)
    if valid_word:
        check_cache = caching.cache_synonyms(single_word, 'thesaurus_plus')
        if not check_cache:
            try:
                synonyms_list = []
                results_synonym = basic_soup.get_single_page_html(
                    f'https://thesaurus.plus/synonyms/{single_word}/category/noun'
                )
                soup = BeautifulSoup(results_synonym, "lxml")
                no_word = soup.find('title', text='404. Page not found')
                if no_word:
                    logger.error(
                        f'thesaurus.plus has no reference for the word {single_word}'
                    )
                    logger.error(
                        f'Please verify that the word {single_word} is spelled correctly.'
                    )
                else:
                    synonyms = []
                    parent_node = soup.find('ul', {
                        'class': 'list paper'
                    }).findAll('li')[1:]
                    for children in parent_node:
                        for child in children.findAll(
                                'div', {'class': 'action_pronounce'}):
                            split_dictionary = str(child.attrs).split(',')
                            synonyms_list.append(split_dictionary[1].replace(
                                "'data-term':", "").replace("'", ""))
                            synonyms = sorted([
                                cleansing.normalize_space(i)
                                for i in synonyms_list
                            ])
                    caching.insert_word_cache_synonyms(single_word,
                                                       'thesaurus_plus',
                                                       synonyms)
                    return synonyms
            except IndexError as e:
                logger.error(
                    'A IndexError occurred in the following code segment:')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.HTTPError as e:
                logger.error('A HTTP error has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.ConnectionError as e:
                if requests.codes:
                    'Failed to establish a new connection'
                    logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.Timeout as e:
                logger.error('A connection timeout has occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
            except requests.RequestException as e:
                logger.error('An ambiguous exception occurred.')
                logger.error(''.join(traceback.format_tb(e.__traceback__)))
        else:
            synonyms = cleansing.flatten_multidimensional_list(
                [val for val in check_cache.values()])
            return synonyms
    else:
        logger.error(f'The word "{single_word}" was not in a valid format.')
        logger.error(
            f'Please verify that the word {single_word} is spelled correctly.')

Пример #14

0

Показать файл

Файл: synonyms.py Проект: johnbumgarner/wordhoard

    def find_synonyms(self):
        """
        Purpose
        ----------
        This function queries multiple online repositories to discover synonyms
        associated with the specific word provided to the Class Synonyms.
        The synonyms are deduplicated and sorted alphabetically.

        Returns
        ----------
        :returns:
            synonyms: list of synonyms

        :rtype: list
        """
        valid_word = self._validate_word()
        if valid_word:
            check_cache = self._check_cache()
            if check_cache[0] is True:
                synonyms = cleansing.flatten_multidimensional_list(
                    check_cache[1])
                if self._output_format == 'list':
                    return sorted(set(synonyms))
                elif self._output_format == 'dictionary':
                    output_dict = {self._word: sorted(set(synonyms))}
                    return output_dict
                elif self._output_format == 'json':
                    json_object = json.dumps(
                        {'synonyms': {
                            self._word: sorted(set(synonyms))
                        }},
                        indent=4,
                        ensure_ascii=False)
                    return json_object

            elif check_cache[0] is False:
                # _query_collins_dictionary() disabled due to Cloudflare protection
                # synonyms_01 = self._query_collins_dictionary()

                synonyms_02 = self._query_merriam_webster()
                synonyms_03 = self._query_synonym_com()
                synonyms_04 = self._query_thesaurus_com()
                synonyms_05 = self._query_wordnet()
                synonyms = ([
                    x for x in
                    [synonyms_02, synonyms_03, synonyms_04, synonyms_05]
                    if x is not None
                ])
                synonyms_results = cleansing.flatten_multidimensional_list(
                    synonyms)
                if not synonyms_results:
                    return _colorized_text(
                        255, 0, 255,
                        f'No synonyms were found for the word: {self._word} \n'
                        f'Please verify that the word is spelled correctly.')
                else:
                    if self._output_format == 'list':
                        return sorted(
                            set([word.lower() for word in synonyms_results]))
                    elif self._output_format == 'dictionary':
                        output_dict = {
                            self._word:
                            sorted(
                                set([
                                    word.lower() for word in synonyms_results
                                ]))
                        }
                        return output_dict
                    elif self._output_format == 'json':
                        json_object = json.dumps(
                            {
                                'synonyms': {
                                    self._word:
                                    sorted(
                                        set([
                                            word.lower()
                                            for word in synonyms_results
                                        ]))
                                }
                            },
                            indent=4,
                            ensure_ascii=False)
                        return json_object
        else:
            return _colorized_text(
                255, 0, 255,
                f'Please verify that the word {self._word} is spelled correctly.'
            )

Python flatten_multidimensional_list примеры использования