Пример #1
0
    def handle(self, *args, **options):

        from wordnik import swagger, WordApi
        client = swagger.ApiClient(settings.WORDNIK_KEY, settings.WORDNIK_API)
        wordApi = WordApi.WordApi(client)
        definitions = wordApi.getDefinitions(
            'child',
            partOfSpeech='noun',
            #sourceDictionaries='wiktionary',
            limit=1)
        for definition in definitions:
            print definition.text

        return
        total = len(args)
        word_i = 0
        invalid_pos_names = set()
        skip_to = 0
        for word_text in args:
            word_i += 1
            if word_i < skip_to:
                continue
            print '=' * 80
            print '%s (%i of %i)' % (word_text, word_i, total)

            utils.import_from_wordnik(word_text,
                                      invalid_pos_names=invalid_pos_names)

            #break
            dsecs = random.randint(1, 5)
            print 'Waiting for %i seconds...' % (dsecs, )
            time.sleep(dsecs)
        print '=' * 80
        print 'Invalid part-of-speech names:', sorted(invalid_pos_names)
Пример #2
0
def word_of_the_day():
    apiUrl = 'http://api.wordnik.com/v4'
    apiKey = '***'
    client = swagger.ApiClient(apiKey, apiUrl)

    words_api = WordsApi.WordsApi(client)
    day_word = words_api.getWordOfTheDay()

    word_api = WordApi.WordApi(client)
    '''
    ugly but sometimes the word of the day wont have IPA pronunciation.  Probably I should figure out what type of
    pron it does have.
    '''
    try:
        pron = word_api.getTextPronunciations(day_word.word, typeFormat='IPA')
    except urllib.error.HTTPError:
        pron = ''

    # word of the day .definitions returns a list with a custom object (SimpleDefinitions) inside.
    # index 0 gets us that object, then we use .text to get the definition text.
    # pron.raw gives us the pronunciation as a string in IPA format per kwarg above.  Not sure what pron.raw[0] is,
    # but it isnt in IPA format, so I'm going to just use [1]

    if pron:
        return day_word.word, day_word.definitions[0].text, pron[
            0].raw  # error is here b/c according to the try/except
        # block up there, pron is a string. because of
        # the if statement, tho, pron wont ever be a str
        # if we're at this point in the code.
    else:
        return day_word.word, day_word.definitions[0].text, pron
Пример #3
0
    def seekdef(self, word):
        if not WORDNIK_API:
            self.chat("WORDNIK_API is not set.")
            return

        client = swagger.ApiClient(WORDNIK_API, 'http://api.wordnik.com/v4')
        wapi = WordApi.WordApi(client)
        results = wapi.getDefinitions(word.strip())

        count = 0

        for item in results:

            try:
                definition = Words(word=item.word,
                                   partofspeech=item.partOfSpeech,
                                   definition=item.text,
                                   source=item.sourceDictionary)

                definition.save()
                if count == 0:
                    tempdef = item.text

                count += 1
            except Exception as e:
                print e
                continue

        if count > 0:
            self.chat("Wordnik coughed up " + str(count) + " definitions.")
            self.chat("Definition 1:" + tempdef)
        else:
            self.chat("I got nothin.")
Пример #4
0
def get_examples(query, limit=5):
    wordapi = WordApi.WordApi(client)
    examples = wordapi.getExamples(query, limit=limit)
    if not examples:
        examples = wordapi.getExamples(query, limit=limit, useCanonical='true')
        if not examples:
            examples = wordapi.getExamples(query,
                                           limit=limit,
                                           useCanonical='true',
                                           includeDuplicates='true')
            if not examples:
                return ''
    out = ''
    examples_list = examples.examples
    if examples_list:
        for i in range(len(examples_list)):
            out += '%d: ' % (i + 1) + examples_list[i].text + '\n'

    # XXX: Experimental
    facets_list = examples.facets
    if facets_list:
        for i in range(len(facets_list)):
            print('\n%d: ' % (i + 1) + facets_list[i].text)

    return out
Пример #5
0
 def checkConnection(self):
     try:
         client = swagger.ApiClient(self.apiKey, self.apiUrl)
         wordApi = WordApi.WordApi(client)
         wordApi.getRelatedWords('test')
         return True
     except:
         return False
Пример #6
0
        def _define():
            # Much complex. Very definition. Such API! Wow!
            api = wordapi.WordApi(self.client)

            # *prays to god this isn't lazy iterative.
            return api.getDefinitions(word,
                                      sourceDictionaries=wordnik_dictionaries,
                                      includeRelated=True)
Пример #7
0
def get_top_example(query):
    wordapi = WordApi.WordApi(client)
    top = wordapi.getTopExample(query)
    if not top:
        top = wordapi.getTopExample(query, useCanonical='true')
        if not top:
            return ''
    out = 'Top Example: ' + top.text
    return out
Пример #8
0
 def defintion(cls, word: str):
     """
     Fetches a definition from Wordnik.
     :param word: The word to define.
     :return: The definition from Wordnik.
     """
     client = swagger.ApiClient(cls.WORDNIK_API_KEY, cls.WORDNIK_API_URL)
     word_api = WordApi.WordApi(client)
     return word_api.getRelatedWords(word)
    def __init__(self, source_language: str, target_language: str, key: str, translator_name: str = 'Wordnik',
                 quality: int = '70',
                 service_name: str = 'Wordnik') -> None:
        super(WordnikTranslator, self).__init__(
            source_language, target_language, translator_name, quality,
            service_name)

        self.key = key

        self.api_client = swagger.ApiClient(self.key, API_URL)
        self.word_api = WordApi.WordApi(self.api_client)
Пример #10
0
def get_hyphenation(query):
    wordapi = WordApi.WordApi(client)
    hyphenation = wordapi.getHyphenation(query)
    if not hyphenation:
        return ''
    out = ''
    for hyphen in hyphenation:
        out += hyphen.text
        if hyphen.type:
            out += '(' + hyphen.type + ')'
        out += ' - '
    return out[:-3]
Пример #11
0
def related_wordnik(w, canonicform=True):
    client = swagger.ApiClient(wordnik_key, wordnik_url)
    wordApi = WordApi.WordApi(client)
    res = wordApi.getRelatedWords(
        w, useCanonical=canonicform
    )  #,relationshipTypes=types,limitPerRelationshipType=limit)
    output = dict()
    if res != 'null getRelatedWords result':
        for related in res:
            output.update({related.relationshipType: related.words})

    return output
Пример #12
0
def get_definition_api(query):
    wordapi = WordApi.WordApi(client)
    print('Searching on the Internet for %s:' % query)
    definitions = wordapi.getDefinitions(query,
                                         sourceDictionaries='all',
                                         includeRelated='true',
                                         useCanonical='false',
                                         includeTags='false')
    if not definitions:
        definitions = wordapi.getDefinitions(query,
                                             sourceDictionaries='all',
                                             includeRelated='true',
                                             useCanonical='true',
                                             includeTags='false')
        if not definitions:
            print('Sorry, nothing found')
            return ''
    print(definitions[0].word, get_pronunciation(query, 1), ':')

    #     else:
    #         print 'Note: Using Canonical form of',query
    # else:
    #     print query, get_pronunciation(query,1), ':'
    previous = ''
    dic_count = 0
    for defs in definitions:
        source = defs.sourceDictionary
        if source != previous:
            source_name = source
            if source == 'gcide':
                source_name = 'GNU CIDE'  # 'GNU Collaborative International Dictionary of English'
            elif source == 'ahd-legacy':
                source_name = 'American Heritage'  # 'American Heritage Dictionary'
            elif source == 'wiktionary':
                source_name = 'Wiktionary'  # 'Wiktionary CCommons'
            elif source == 'century':
                source_name = 'Century'  # 'Century Dictionary and Cyclopedia'
            elif source == 'wordnet':
                source_name = 'WordNet'  # 'WordNet, Princeton University'
            # print '\n---->',source_name,'<----'
            dic_count += 1
            print('%d: %s -->' % (dic_count, source_name))
            previous = source
        if defs.partOfSpeech:
            print('(%s)' % defs.partOfSpeech[0], end='')
        else:
            print('( )', end='')
        print(defs.text)
        # print '(%s)'%defs.partOfSpeech[0],defs.text
        # print defs.sourceDictionary,':',defs.text
    print('\n', get_top_example(query))
    return ''
Пример #13
0
def get_pronunciation(query, send_one=0):
    wordapi = WordApi.WordApi(client)
    pro = wordapi.getTextPronunciations(query)
    if not pro:
        pro = wordapi.getTextPronunciations(query, useCanonical='true')
        if not pro:
            return ''
    if send_one:
        return pro[0].raw
    else:
        print(get_hyphenation(query))
        for p in pro:
            print(p.raw)
    return
Пример #14
0
 def __init__(self):
     self.word_api = WordApi.WordApi(
         swagger.ApiClient(wordnik_api_key, wordnik_api))
     self.wordoftheday_api = WordsApi.WordsApi(
         swagger.ApiClient(wordnik_api_key, wordnik_api))
     self.urbandictionary_api = urbandictionary_api
     self.dictionaryCache = LFUCache(maxsize=1000)
     self.urbandictionaryCache = LFUCache(maxsize=1000)
     self.wordOfTheDayCache = {}
     self.session = requests.Session()
     self.session.mount('http://',
                        requests.adapters.HTTPAdapter(max_retries=5))
     self.session.mount('https://',
                        requests.adapters.HTTPAdapter(max_retries=5))
Пример #15
0
def get_synonyms(word):
    api_url = 'http://api.wordnik.com/v4'
    api_key = '495685498a8807c1d60070b8cd908c4dd54326674bcc6ddb9'
    client = swagger.ApiClient(api_key, api_url)
    word_api = WordApi.WordApi(client)
    related_words = word_api.getRelatedWords(word,
                                             limitPerRelationshipType=100)

    set_words = Set([word])  # initialize the set of words

    if isiterable(related_words):
        for word_group in related_words:
            if word_group.relationshipType in [
                    'equivalent', 'synonym'
            ]:  # just grab in equivalent and synonym
                set_words = set_words | Set(
                    word_group.words)  # union of words to prevent duplicates

    return list(set_words)
def relatedwords(word):
    client = swagger.ApiClient(apiKey, apiUrl)
    wordApi = WordApi.WordApi(client)

    res = wordApi.getRelatedWords(word, limit=10)
    return res
Пример #17
0
 def apiCall(self, word, apiLang):
     client = swagger.ApiClient(self.apiKey, self.apiUrl)
     wordApi = WordApi.WordApi(client)
     relatedWords = wordApi.getRelatedWords(word)
     return relatedWords
Пример #18
0
    def crawl_wordnik(self,
                      vocab,
                      api_key,
                      corenlp_url,
                      call_quota=15000,
                      crawl_also_lowercase=False,
                      crawl_also_lemma=False):
        """Download and preprocess definitions from Wordnik.

        vocab
            Vocabulary for which the definitions should be found.
        api_key
            The API key to use in communications with Wordnik.
        call_quota
            Maximum number of calls per hour.
        crawl_also_lowercase
            If true will add lowercase version of each word to crawl list
        crawl_also_lemma
            If true will also crawl lemma versions of words
            WARNING: Lemma of Cat is Cat! So if you want to have definition of "cat"
            you have to also pass crawl_also_lowercase!

        """
        corenlp = StanfordCoreNLP(corenlp_url)

        self._remaining_calls = call_quota
        self._last_saved = 0

        client = swagger.ApiClient(api_key, 'https://api.wordnik.com/v4')
        self._word_api = WordApi.WordApi(client)
        self._account_api = AccountApi.AccountApi(client)

        words = list(vocab.words)

        # Note(kudkudak): for SNLI it adds 3k words
        if crawl_also_lowercase:
            words_set = set(words)  # For efficiency

            logger.info("Adding lowercase words to crawl")
            lowercased = []
            for w in words:
                if w.lower() not in words_set:
                    lowercased.append(w.lower())
            logger.info("Crawling additional {} words".format(len(lowercased)))
            words.extend(sorted(lowercased))

        # Note(kudkudak): for SNLI it adds 2k words, so we can expect
        # like sabotage,sabotaging
        # Note that lemma crawling is *after* lowercasing
        if crawl_also_lemma:
            words_set = set(words)  # For efficiency

            logger.info("Adding lemmatized vrsions to crawl")
            lemmas = []
            original = []
            lemmatizer = nltk.WordNetLemmatizer()
            for w in words:
                if isinstance(w, str):
                    w = w.decode('utf-8')

                for part_of_speech in ['a', 's', 'r', 'n', 'v']:
                    lemma = lemmatizer.lemmatize(w, part_of_speech)
                    if lemma not in words_set:
                        lemmas.append(lemma)
                        original.append(w)
            logger.info("Crawling additional {} words".format(len(lemmas)))
            for id in numpy.random.choice(len(lemmas), 100):
                logger.info("Example:" + lemmas[id] + "," + original[id])
                words.extend(sorted(lemmas))

        # Here, for now, we don't do any stemming or lemmatization.
        # Stemming is useless because the dictionary is not indexed with
        # lemmas, not stems. Lemmatizers, on the other hand, can not be
        # fully trusted when it comes to unknown words.
        for word in words:
            if isinstance(word, str):
                word = word.decode('utf-8')

            if word in self._data:
                logger.debug(u"a known word {}, skip".format(word))
                continue

            if self._last_saved >= _SAVE_EVERY_CALLS:
                self.save()
                self._last_saved = 0

            # 100 is a safery margin, I don't want to DDoS Wordnik :)
            if self._remaining_calls < _MIN_REMAINING_CALLS:
                self._wait_until_quota_reset()
            try:
                if isinstance(word, str):
                    word = word.decode('utf-8')
                # NOTE(kudkudak): We fetch all dictionaries, but retrieval can filter them based on meta info
                definitions = self._word_api.getDefinitions(word)
            except Exception:
                logger.error(u"error during fetching '{}'".format(word))
                logger.error(traceback.format_exc())
                continue
            self._remaining_calls -= 1
            self._last_saved += 1

            if not definitions:
                definitions = []
            self._data[word] = []
            for def_ in definitions:
                try:
                    # seems like definition text can be both str and unicode
                    text = def_.text
                    if def_.text is None:
                        continue
                    if isinstance(text, str):
                        text = text.decode('utf-8')
                    tokenized_def = corenlp.tokenize(text)[0]
                    self._data[word].append(tokenized_def)
                    # Note(kudkudak): I don't think there is much more useful meta data for us
                    # Note(kudkudak): This might seem strange, but I am afraid this is most robust (least bug prone)
                    # way of storing meta data that doens't require rewriting dict storage format
                    self._meta_data[" ".join(tokenized_def)] = {
                        "sourceDictionary": def_.sourceDictionary
                    }
                except Exception:
                    logger.error("error during tokenizing '{}'".format(text))
                    logger.error(traceback.format_exc())
            logger.debug(u"definitions for '{}' fetched {} remaining".format(
                word, self._remaining_calls))
        self.save()
        self._last_saved = 0
Пример #19
0
import os

from wordnik import swagger, WordApi, WordsApi
import tweepy

import secret

TWITTER_CONSUMER_KEY = os.environ['TWITTER_CONSUMER_KEY']
TWITTER_CONSUMER_SECRET = os.environ['TWITTER_CONSUMER_SECRET']
TWITTER_ACCESS_KEY = os.environ['TWITTER_ACCESS_KEY']
TWITTER_ACCESS_SECRET = os.environ['TWITTER_ACCESS_SECRET']
auth = tweepy.OAuthHandler(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET)
auth.set_access_token(TWITTER_ACCESS_KEY, TWITTER_ACCESS_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True)

WORDNIK_ACCESS_URL = 'https://api.wordnik.com/v4'
WORDNIK_ACCESS_KEY = os.environ['WORDNIK_ACCESS_KEY']
client = swagger.ApiClient(WORDNIK_ACCESS_KEY, WORDNIK_ACCESS_URL)
wordAPI = WordApi.WordApi(client)
wordsAPI = WordsApi.WordsApi(client)
Пример #20
0
import re
import pyfscache
from nltk.corpus import wordnet
from nltk.stem.snowball import SnowballStemmer
import wikipedia
from wordnik import swagger, WordApi
from vocabulary.vocabulary import Vocabulary as vb

if (sys.version_info < (3, 0)):
    reload(sys)
    sys.setdefaultencoding('utf8')

apiUrl = 'http://api.wordnik.com/v4'
apiKey = 'a1b28252f1c2bd049897a03d4e81e85c5d6dbca71cb8dcac8'
client = swagger.ApiClient(apiKey, apiUrl)
wordApi = WordApi.WordApi(client)
stemmer = SnowballStemmer("english")

fs_cache = pyfscache.FSCache('data/cache/')
wikipedia.set_rate_limiting(True)

DEBUG = False
PUNC = set(string.punctuation)


def clean_str(string):
    """
    Cleans a str by making it all lower case, removing punctuation, and removing any html

    Args:
        string: the str to clean
Пример #21
0
        pronunciationType = 'none'

    return pronunciation, pronunciationType


# Funcation to remove punctuation from string.
def removePunctuation(s):
    # Remove apostrophies from set of punctuation to be removed (affects the look-up of contractions).
    sp = string.punctuation.replace('\'', '')

    return s.translate(str.maketrans('', '', sp))


# Create client and WordApi objects.
client = swagger.ApiClient(wordnikKey, wordnikUrl)
my_dict = WordApi.WordApi(client)

# Input and output filenames.
text_filename = './input.txt'
output_filename = './output.txt'
look_up_filename = './CMU.in.IPA.txt'

# Open files for reading and writing.
text = open(text_filename, 'r')
output_text = open(output_filename, 'wb')
look_up_text = open(look_up_filename, 'r')

# Pronunciation formats supported
acceptable_formats = ['ahd', 'ipa']

# Check command line arguments. If incorrect exit proram.
Пример #22
0
aiohttp_session = aiohttp.ClientSession()
clarifai_app = clarifai.rest.ClarifaiApp(
    app_id=credentials.clarifai_api_id,
    app_secret=credentials.clarifai_api_secret)
clarifai_general_model = clarifai_app.models.get("general-v1.3")
clarifai_nsfw_model = clarifai_app.models.get("nsfw-v1.0")
inflect_engine = inflect.engine()
owm_client = pyowm.OWM(credentials.owm_api_key)
twitter_auth = tweepy.OAuthHandler(credentials.twitter_consumer_key,
                                   credentials.twitter_consumer_secret)
twitter_auth.set_access_token(credentials.twitter_access_token,
                              credentials.twitter_access_token_secret)
twitter_api = tweepy.API(twitter_auth)
wordnik_client = swagger.ApiClient(credentials.wordnik_apikey,
                                   "http://api.wordnik.com/v4")
wordnik_word_api = WordApi.WordApi(wordnik_client)
wordnik_words_api = WordsApi.WordsApi(wordnik_client)
wolfram_alpha_client = wolframalpha.Client(credentials.wolframalpha_appid)
application_info = None
harmonbot_listener = None
# TODO: Include owner variable for user object?
sys.setrecursionlimit(5000)

try:
    imgur_client = imgurpython.ImgurClient(credentials.imgur_client_id,
                                           credentials.imgur_client_secret)
except imgurpython.helpers.error.ImgurClientError as e:
    print("Discord Harmonbot: Failed to load Imgur Client: {}".format(e))

aiml_predicates = {
    "name": "Harmonbot",
Пример #23
0
 def __init__(self, bot):
     self.bot = bot
     self.apiUrl = 'http://api.wordnik.com/v4'
     self.apiKey = self.bot.tokens['wordnik']
     self.client = swagger.ApiClient(self.apiKey, self.apiUrl)
     self.wordApi = WordApi.WordApi(self.client)
def definition(word):
    client = swagger.ApiClient(apiKey, apiUrl)
    wordApi = WordApi.WordApi(client)

    res = wordApi.getDefinitions(word, limit=3)
    return [result.text for result in res]
Пример #25
0
 def __init__(self):
     self._token = configfiles.get_config_data(config_file)
     self.api_client = swagger.ApiClient(self._token, wordnik_endpoint)
     self.api = WordApi.WordApi(self.api_client)
# wordniktesting.py
# Just a file to test the output of some
from wordnik import WordApi as wap
from wordnik import swagger
from config import myapikey # Delete this after you insert your key
apiUrl = 'http://api.wordnik.com/v4'
apiKey = myapikey # <----- Insert your api key here
client = swagger.ApiClient(apiKey, apiUrl)

wordApi = wap.WordApi(client)
TEST_WORD = 'scruple'
def ig_defns():
    definitions = wordApi.getDefinitions(TEST_WORD, sourceDictionaries='wiktionary', limit=10)
    print(len(definitions))
    print(definitions)

def ig_examples():
    my_examples = wordApi.getExamples(TEST_WORD, limit=10)
    for txt in my_examples.examples:
        print(txt.text)

def ig_relwords():
    related = wordApi.getRelatedWords(TEST_WORD)
    print(related[1].words)  # index 1 seems to be the synonyms
    for rel in related[1].words:
        print(rel)

def ig_pronunc():
    pronunciation = wordApi.getTextPronunciations(TEST_WORD)
    for pron in pronunciation:
        print(pron.raw)