def get_info_from_wiktionary(one_word):
    parser = WiktionaryParser()

    word = parser.fetch(one_word, 'german')

    definitions = word[0]['definitions']
    print(definitions)
Exemplo n.º 2
0
class WikiTagger:
  
  def __init__(self):
    self.wp = WiktionaryParser()
    self.wp.set_default_language('portuguese')

  def fetch(self, tokens):
    
    for x in tokens:
      if x.pos == 'TEMP': #or isinstance(x.pos,list):
        try:
          #parts = x.pos if isinstance(x.pos, list) else [] #persistencia
          parts = []
          p = self.wp.fetch(x.symbol)
          for k in p:
            for y in k['definitions']:
              gender = ''
              if 'm' in y['text'][0].split():
                gender = 'MASC'
              elif 'f' in y['text'][0].split():
                gender = 'FEMI'
              else:
                gender = 'DESC'
              coiso = PartOfSpeech(y['partOfSpeech'],gender, "TEMP")
              parts.append(coiso)
          if parts:
            x.pos = parts
          else:
            x.pos = [PartOfSpeech('proper noun','DESC','TEMP'),]
          #x.pos = wp.fetch(x.symbol)[0]['definitions'][0]['partOfSpeech']
        except Exception as e:
          print(e)
          x.pos = "ERROR"
    return tokens
Exemplo n.º 3
0
def obtener_tipo_palabra(palabra):
    """
    Valida que la palabra exista. En caso afirmativo, retorna el tipo
    de palabra (Adjetivo, sustantivo o verbo). Caso negativo, retorna
    vacío.
    
    Args:
        palabra (str): Palabra a validar.

    Returns:
        Tipo (str): Tipo de palabra, siendo estos 'A' para Adjetivo,
                    'S' para Sustantivo y 'V' para Verbo.
                    En caso de que la palabra no exista, retorna vacío.
    """

    try:

        parser = WiktionaryParser()
        parser.set_default_language('spanish')

        word = parser.fetch(palabra)

        tipo_palabra = word[0]['definitions'][0]['partOfSpeech']

        if (tipo_palabra == 'adjective'):
            return (ADJETIVOS)
        elif (tipo_palabra == 'noun'):
            return (SUSTANTIVOS)
        elif (tipo_palabra == 'verb'):
            return (VERBOS)
        else:
            return ('')

    except IndexError:
        return ('')
Exemplo n.º 4
0
def is_noun(word, country_code):
    """
    This method takes word and country_code and returns boolean value based on the word being noun
    Args:
        word: String
        country_code: String

    Returns: Boolean

    """
    if country_code == 'EN':
        for tmp in nltk.corpus.wordnet.synsets(word):
            if tmp.name().split('.')[0] == word and tmp.name().split(
                    '.')[1] == 'n':
                return True
            return False
    if country_code == 'PL':
        parser = WiktionaryParser()
        try:
            another_word = parser.fetch(
                word, 'polish')[0]['definitions'][0]['partOfSpeech']
            if another_word == "noun":
                return True
            else:
                return False
        except:
            return False
Exemplo n.º 5
0
    def getDef(word):
        dictionary = PyDictionary()
        definition = dictionary.meaning(word)
        if isinstance(definition, dict) and 'Noun' in definition:
            defs = definition['Noun']
            if isinstance(defs, list) and len(defs) > 0:
                return defs

        # wordnik dictionary
        wordApi = WordApi.WordApi(client)
        definitions = (wordApi.getDefinitions(word,
                                              partOfSpeech='noun',
                                              limit=3))
        if definitions is not None and len(definitions) > 0:
            return [(definition.text).lower() for definition in definitions]
        meaningsList = vocabulary.meaning(word)
        if meaningsList != False:
            defs = json.loads(meaningsList)
            if (len(defs) > 0):
                definitions = []
                for definition in defs:
                    if definition['text']:
                        d = re.sub('<[^<]+?>', '', definition['text'])
                        definitions.append(d.lower())
                if len(definitions) > 0:
                    return definitions
        # owlbot api
        url = 'https://owlbot.info/api/v2/dictionary/' + word
        r = requests.get(url)
        if r is not None:
            try:
                result = r.json()
                if len(result) > 0:
                    definitions = []
                    for item in result:
                        if (item['type'] == 'noun' and item['definition']):
                            definitions.append(item['definition'].lower())
                    if len(definitions) > 0:
                        return definitions
            except:
                pass
        # wiktionary
        try:
            parser = WiktionaryParser()
            result = parser.fetch(word)
            if result is not None:
                definition = result[0]['definitions']
                if definition and len(definition) > 0:
                    definition = definition[0]
                    if 'partOfSpeech' in definition:
                        if definition['partOfSpeech'] == 'noun':
                            defs = definition['text'].lower().split('\n')
                            if len(defs) > 1:
                                return defs[0:2]
                            elif len(defs) == 1:
                                return defs
        except:
            return ' '
        return ' '
Exemplo n.º 6
0
def get_ipa(word):
    from wiktionaryparser import WiktionaryParser
    parser = WiktionaryParser()
    another_word = parser.fetch(word, 'french')
    try:
        print(another_word[0]['pronunciations']['text'][0])
    except IndexError:
        print("WIKI ERROR: THE INDEX IS OUT OF RANGE")
 async def run(self, message: discord.Message, trigger: str,
               client: discord.Client):
     word = message.content[len(trigger):]  # string
     parser = WiktionaryParser()
     fetched = parser.fetch(
         "word", language="english"
     )  # default could also be used as default is english
     print(fetched)
Exemplo n.º 8
0
	def __init__(self, bot):
		self.bot = bot

		self.parser = WiktionaryParser()
		self.words = {}
		self.output = None
		self.embed = None

		self.parser.set_default_language('english')
Exemplo n.º 9
0
def get_etymology_trees(word_str: str, language: str) -> List[Word]:
    parser = WiktionaryParser()
    word_info = parser.fetch(word_str, language)
    word_origins: List[Word] = []

    for origin in word_info:
        word = Word(word_str, language)
        parse(origin['etymology'])
    return word_origins
Exemplo n.º 10
0
def requestAPI(word="test"):
    """ Query wiktionary API for json results using WiktionaryParser
    Args:
        word: queried word; default value, "test".
    Returns:
        word queried, and JSON table of queried word
    """
    parser = WiktionaryParser()
    word_json = parser.fetch(word)

    return word, word_json
def get_wiktionary_entry(language, word):
    """Interface to the requestion something from wiktionary.
    Arguments:
        language = language of which we want the entry
        word = word  of which we want the entry
    Returns:
        parsed wiktionary page
    """
    parser = WiktionaryParser()
    parser.set_default_language(language)
    return parser.fetch(word)
def define_word(word):
    parser = WiktionaryParser()
    json_word = parser.fetch(word, 'yiddish')
    d = []
    try:  #error handling for when wiktionaryparse returns an empty set/list
        d = json_word[0]
        return d["definitions"]
    except:
        #print("d="+str(d))
        #print(type(d))
        #print("json_word="+str(json_word))
        #print(type(json_word))
        return False
Exemplo n.º 13
0
 def _scrape_parser_info(cls, idiom_id: str) -> Optional[dict]:
     logger = logging.getLogger("_scrape_parser_1_info")
     # using this wiktionary parser
     parser = WiktionaryParser()
     # include alternative forms as well (e.g. beat around the bush = beat about the bush)
     parser.include_relation('alternative forms')
     try:
         idiom_info = parser.fetch(idiom_id)
     except AttributeError as ae:
         logger.warning(str(ae))
         return None
     else:
         return idiom_info
Exemplo n.º 14
0
def definition_english(buff):
    from wiktionaryparser import WiktionaryParser
    import json
    parser = WiktionaryParser()
    word = json.dumps(parser.fetch(buff),sort_keys=True, indent=4)
    data = json.loads(word)
    c = ""
    try:
        for i in data[0]["definitions"][0]["text"]:
            c += i + "\n"
        return c
    except IndexError:
        return
Exemplo n.º 15
0
    def query(self, word):
        """A method to retrieve Wiktionary data online.

        A wrapper for `WiktionaryParser
        <https://www.github.com/Suyash458/WiktionaryParser>`_.

        Args:
            word (str): word to be queried.

        Returns:
            a list of Wiktionary data points

        Todo:
            Find the specific error that is thrown when too many
            requests are made in parallel.
        """

        #convert from language code to canonical name for Wiktionary parser
        language = lookup_language_by_code(self._language)

        #set language
        parser = WiktionaryParser()
        parser.set_default_language(language)

        # @functools.lru_cache(maxsize=config["cache_size"])
        def retrieve_wikidata(word, parser=parser, silent=True):
            """Helper for querying wiktionaryparser and rasing appropriate
            exceptions."""
            try:
                res = parser.fetch(word)
                return res
            except TypeError:
                if not silent:
                    print("Query '" + word +
                          "' failed. It is probably missing in "
                          "Wiktionary.")
                    return None
            except requests.exceptions.ConnectionError:
                print("Query '" + word + "' failed. LDT couldn't reach "
                      "Wiktionary. Your connection may be down or refused "
                      "by the server.")
                return None

        if not self.cache:
            return retrieve_wikidata(word)

        else:
            if not word in self.cache:
                return None
            else:
                return retrieve_wikidata(word)
Exemplo n.º 16
0
def get_wiki_tags(entry):
    convert = {'adjective':'ADJ', 'adposition':'ADP', 'preposition':'ADP',\
               'adverb': 'ADV', 'auxiliary':'AUX', 'coordinating conjunction': 'CCONJ',\
               'determiner': 'DET', 'interjection':'INTJ', 'noun':'NOUN',\
               'numeral':'NUM', 'particle':'PART', 'pronoun':'PRON', 'proper noun':'PROPN',\
               'punctuation':'PUNCT', 'subordinating conjunction':'SCONJ', 'symbol':'SYM',\
               'verb':'VERB', 'other':'X', 'article':'DET', 'conjunction':'PART'}
    # ADJ: adjective
    # ADP: adposition
    # ADV: adverb
    # AUX: auxiliary
    # CCONJ: coordinating conjunction
    # DET: determiner
    # INTJ: interjection
    # NOUN: noun
    # NUM: numeral
    # PART: particle
    # PRON: pronoun
    # PROPN: proper noun
    # PUNCT: punctuation
    # SCONJ: subordinating conjunction
    # SYM: symbol
    # VERB: verb
    # X: other

    parser = WiktionaryParser()
    words = entry[1][0]
    wikitionary_tags = []

    for word in words:
        wiki_pos = 'X'
        try:
            results = parser.fetch(word, 'greek')  #[0]['definitions']
            if results:
                if results[0]['definitions']:
                    for wiki_idx in range(len(results[0]['definitions'])):
                        wiki_pos = results[0]['definitions'][wiki_idx][
                            'partOfSpeech']
                        if wiki_pos in convert:
                            wiki_pos = convert[wiki_pos]
                            break
                        else:
                            print(f'** cant convert wiki_pos: {wiki_pos}')
                            print(word)
                            print('--------------------------------------')
                            wiki_pos = 'X'
        except AttributeError as error:
            print(f'Error: {error}')
        wikitionary_tags.append(wiki_pos)
    return wikitionary_tags
Exemplo n.º 17
0
class Wiktionary:

	def __init__(self, bot):
		self.bot = bot

		self.parser = WiktionaryParser()
		self.words = {}
		self.output = None
		self.embed = None

		self.parser.set_default_language('english')

	def __fetch_word(self, word):
		self.words = self.parser.fetch(word)

	@commands.group(brief='Gives you a word\'s etymology, definition, examples etc.')
	async def word(self, ctx):
		pass

	@word.command(brief='Changes the language the command will use.')
	async def lang(self, ctx, lang):
		self.parser.set_default_language(lang)
		language_list = 'https://en.wiktionary.org/wiki/Wiktionary:List_of_languages'
		await ctx.send(f'Language changed to {lang}.\nThe list of languages can be found here: {language_list}')

	@word.command(brief='Gives you a word\'s etymologies.', aliases=['e', 'ety'])
	async def etymology(self, ctx, word):
		self.__fetch_word(word)
		title = word
		description = f'{len(self.words)} results found.'
		self.embed = discord.Embed(color=ctx.message.author.color, title=title, description=description)
		for i, word in enumerate(self.words[:3], 1):
			self.embed.add_field(name=i, value=word['etymology'])
		await ctx.send(embed=self.embed)

	@word.command(brief='Gives you example usages for a word.', aliases=['ex'])
	async def example(self, ctx, word):
		self.__fetch_word(word)
		self.output = [str(word['definitions'][0]['examples']) for i, word in enumerate(self.words)][:3]
		print(self.output)
		await ctx.send('\n'.join(self.output))

	@word.command(brief='Gives you a word\'s definition.', aliases=['d', 'def'])
	async def definition(self, ctx, word):
		self.__fetch_word(word)
		self.output = [str(word['definitions'][0]['text']) for i, word in enumerate(self.words)][:3]
		print(self.output)
		await ctx.send('\n'.join(self.output))
Exemplo n.º 18
0
class WiktionarySkill(MycroftSkill):
    def __init__(self):
        MycroftSkill.__init__(self)

    def initialize(self):
        self.register_entity_file("word.entity")
        self.parser = WiktionaryParser()

    @intent_file_handler('fallback.wiktionary.definition.intent')
    def handle_wiktionary_definition(self, message):
        #Get word to define from utterance
        word = message.data.get('word')
        #Lookup the word using Wiktionary
        get_word_info = self.parser.fetch(word)

        #Speak definition for requested word back to user
        try:
            # Get first definition from wiktionary response
            response = get_word_info[0]['definitions'][0]['text'][1]
            # Log the definition
            LOG.info(response)
            self.speak_dialog('fallback.wiktionary', {
                'word': word,
                'definition': response
            })
        except:
            self.speak_dialog('error.wiktionary')
def get_wiktionary_entry(language, word):
    """Interface to the requestion something from wiktionary.
    Arguments:
        language = language of which we want the entry
        word = word  of which we want the entry
    Returns:
        parsed wiktionary page
    """
    parser = WiktionaryParser()
    parser.set_default_language(language)
    try:
        return parser.fetch(word)
    except Exception as e:
        print("problem with word {}, language {}".format(word, language))
        print(e)
        return []
    def __init__(self,docpath,language='english'):

        super(wiktionaryGraph, self).__init__(docpath)

        self.parser = WiktionaryParser()
        self.parser.set_default_language(language)

        self.wiki_parse = defaultdict()
        self.pronounciations = defaultdict()
        self.definitions = defaultdict()
        self.etymologies = defaultdict()

        # still undecided whether the graph in general should have multiple links
        # or if it should be one link with multiple inputs.

        self.wikiGraph = nx.MultiGraph()
Exemplo n.º 21
0
    def _wiktionaryparser(self, word):
        results = WiktionaryParser().fetch(word)
        fieldsets = []

        for etym in results:
            if 'etymology' in etym:
                parts = []
                for defn in etym['definitions']:
                    parts.append('**{pos}.**\n-{defs}'.format(
                        pos=defn['partOfSpeech'],
                        defs='\n-'.join(defn['text'])))
                defns = "\n".join(parts)

                fieldsets.append([
                    {
                        'value': etym['etymology'],
                        'name': 'Etymology'
                    },
                    {
                        'value': defns,
                        'name': 'Definitions'
                    },
                    {
                        'value': '\n'.join(etym['pronunciations']['text']),
                        'name': 'Pronunciations'
                    },
                ])

        return fieldsets
Exemplo n.º 22
0
class WikLoader():
    root = Tk()
    root.geometry("200x150")
    pattern = r'/[^,>]+/'
    repattern = re.compile(pattern)

    parser = WiktionaryParser()
    parser.set_default_language('english')
    parser.include_relation('alternative forms')

    wordInput = 'lovely'

    words = parser.fetch(wordInput)
    origin = ""
    definitions = []
    poSpeechs = ""
    pronunciations = ""
    for word in words:
        origin += word['etymology']
        pronunciations = repattern.match(word['pronunciations']['text'][0])
        definitions = word['definitions']
        for definition in definitions:
            poSpeechs += ('\n' + definition['partOfSpeech'])

    print(origin)
    print(poSpeechs)
    print(pronunciations)
Exemplo n.º 23
0
def translate(word: str) -> str:
    """ connect to wiktionary, get all part of speech, join them into one string, and return here """
    global LINE_DIVIDER

    parser = WiktionaryParser()
    def_ = parser.fetch(word.lower())
    ret = ""
    for word_payload in def_:
        definitions = word_payload['definitions']

        translations = {
            d['partOfSpeech']: LINE_DIVIDER.join(d['text'])
            for d in definitions
        }
        ret += LINE_DIVIDER.join(f"{k}: {v}" for k, v in translations.items())

    return ret
Exemplo n.º 24
0
def fetch_word(word):
    parser = WiktionaryParser()
    each_word = word.lower()
    word_details = parser.fetch(each_word)
    if len(word_details) and len(word_details[0].get('definitions', [])):
        word_details = word_details[0]
        word_definitions = word_details.pop('definitions')
        priority = 0
        pronounciation_details = word_details.pop('pronunciations')

        audio_links = pronounciation_details.get('audio', [])
        pronounciations = pronounciation_details.get('text', [])
        new_word = Word.objects.create(word_english=each_word,
                                       pronounciations=pronounciations,
                                       audio_links=audio_links)
        translations = word_details.pop('translations')
        for each_translation in translations:
            meaning = each_translation.get('meaning')
            for language_code in each_translation.get(
                    'available_translations'):
                for each_local_word in each_translation.get(
                        'available_translations').get(language_code):
                    Translation.objects.create(
                        english_word=new_word,
                        meaning=meaning,
                        local_word=each_local_word,
                        utf_encoded=each_local_word.encode('utf-8'),
                        language=language_code)

        for each_definition in word_definitions:
            definition_text = each_definition.get('text', [])
            part_of_speech = each_definition.get('partOfSpeech')
            examples = each_definition.get('examples')
            synonyms = []
            for each_related in each_definition.get('relatedWords'):
                if each_related.get('relationshipType', '') == 'synonyms':
                    synonyms = each_related.get('words', [])
            new_word_definition = WordDefinition.objects.create(
                english_word=new_word,
                definitions=definition_text,
                priority=priority,
                part_of_speech=part_of_speech,
                examples=examples,
                synonyms=synonyms)
            priority += 1
def parseAndReturn(word):
    parser = WiktionaryParser()
    defList = parser.fetch(word)[0]["definitions"]
    defText = []
    if len(defList) == 0:
        raise AttributeError("CANNOT FIND A DEFINITION")
    for i in range(len(defList)):
        if i >= constants.MAX_NUM_DEFINITIONS:
            break
        defText.append([defList[i]["partOfSpeech"]])
        if debug:
            print(len(defList[i]["text"]))
        for j in range(1, len(defList[i]["text"])):
            if j > constants.MAX_DEPTH_PER_DEF:
                break
            defText[i].append(defList[i]["text"][j])

    return defText
Exemplo n.º 26
0
def wiktionary_retriever(word_list, lang):
    retrieved_words = dict()
    parser = WiktionaryParser()
    for word in word_list:
        retrieved_words[word] = parser.fetch(word, lang)

    wiktionary_dict = dict()
    for k, v in retrieved_words.items():
        try:
            if len(v) != 0:
                v = v[0]
                if len(v['definitions']) != 0:
                    gender, plural = "", ""
                    # Check the information in the text field
                    if lang == "italian":
                        text = v['definitions'][0]['text'][0].replace(
                            k, "").replace("\xa0", "").split(" (plural ")
                    else:
                        text = re.split(
                            r'.*plural ',
                            v['definitions'][0]['text'][0].replace(
                                k, "").replace("\xa0", ""))

                    if len(text) == 1:
                        gender = text[0]
                    elif len(text) >= 2:
                        gender = text[0]
                        if len(text[1].split(")")[0]) > 3:
                            plural = text[1].split(")")[0].replace(")", "")
                    else:
                        pass

                    wiktionary_dict[k] = (clean_text(v['etymology']),
                                          v['definitions'][0]['partOfSpeech'],
                                          gender, plural)
                else:
                    wiktionary_dict[k] = []
            else:
                wiktionary_dict[k] = []
        except:
            print("wiktionary err:", k)

    return wiktionary_dict
def init_wikparser():
    '''
    Initialize wiktionary parser
    '''
    parser = WiktionaryParser()
    RELATIONS = [
        "synonyms",
        "antonyms",
        "hypernyms",
        "hyponyms",
        "meronyms",
        "holonyms",
        "troponyms",
        "related terms",
        "coordinate terms",
    ]
    for rel in RELATIONS:
        parser.exclude_relation(rel)

    return parser
Exemplo n.º 28
0
    def __init__(self, cache_directory: Path, from_language: Language):
        """
        Target language.

        :param cache_directory: directory for cache files
        :param from_language: target language
        """
        super().__init__()
        self.from_language: Language = from_language
        self.cache_directory: Path = cache_directory
        self.parser: WiktionaryParser = WiktionaryParser()

        self.add_obsolete: bool = False
def get_info(w, lang=None, parser=None, debug=False, postfix=None):
    try:
        if parser is None:
            parser = WiktionaryParser()
        info = parser.fetch(w, lang)[0]['definitions']
        pos_list, morph_list = [], []
        for info_dct in info:
            pos = info_dct['partOfSpeech']
            txt = info_dct['text']
            pos_list.append(pos)
            morph_list.append(wp_morph(txt, pos))
        result_dict = {
            'w': w,
            'src': lang[:2],
            'pos_wp': pos_list,
            'morph': morph_list
        }
        return {k + postfix: v for k, v in result_dict.items()} \
            if is_(postfix) else result_dict
    except:
        if debug:
            tb.print_exc()
        return {}
Exemplo n.º 30
0
class wiktionary(dictionaryAPIBaseClass):
    def __init__(self):
        self.parser = WiktionaryParser()

    def __del__(self):
        pass

    def word_definition(self, word):
        try:
            retrieved_definitions = self.parser.fetch(word)
        except Exception as e:
            print(f'An exception was raised when calling wiki API: {e}')
            exit(1)

        definition_list = retrieved_definitions[0]['definitions'][0]['text']
        definition = definition_list[random.randrange(1, len(definition_list))]
        return definition

    def word_check(self, word):

        if not self.parser.fetch(word)[0]['definitions']:
            return False
        else:
            return True
Exemplo n.º 31
0
 def load(self):
     words = self.parent.get_words()
     info = {}
     url = "https://en.wiktionary.org/w/index.php?title={}&printable=yes"
     lang = "English" # get from the selector in the future
     parser = WiktionaryParser(lang)
     for w in words:
         r = requests.get(url.format(w), headers={}) 
         parser.feed(r.text)
         info[w] = str(parser.trans)
         parser.reset()
     self.parent.add_info('Answer', info)
Exemplo n.º 32
0
 def __init__(self, logger):
     WiktionaryParser.__init__(self, logger)
     self.logger = logger