Exemplo n.º 1
0
def download(connectionPool, sourceURL, targetFileName):
    coutput.print_watcher("sourceURL")

    fileData = connectionPool.request('GET', sourceURL).data
    targetFile = open(targetFileName, "wb")
    targetFile.write(fileData)
    targetFile.close()
Exemplo n.º 2
0
    def parse_xml(self, root, word):
        _FUNC_NAME_ = "CollegiateDictionary.parse_xml"
        for entry in root.findall('entry'):
            args = {}
            args['headword'] = entry.find('hw').text
            args['spelling'] = re.sub("\*", "", entry.find('hw').text)
            args['functional_label'] = getattr(entry.find('fl'), 'text', DICT_UNICODE_EMPTY_STR)
            
            args['pronunciation'] = getattr(entry.find('pr'), 'text', DICT_UNICODE_EMPTY_STR)
            coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, "args['pronunciation']") 
            #args['pronunciations'] = self._get_pronunciations(entry)
            
            args['inflections'] = self._get_inflections(entry)
            args['senses'] = self._get_senses(entry)
            
            args['sound_fragments'] = [e.text for e in
                                              entry.findall("sound/wav")
                                              if e.text]
            coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, "args['sound_fragments']")

            args['illustration_fragments'] = [e.text for e in
                                              entry.findall("art/bmp")
                                              if e.text]

            yield CollegiateDictionaryEntry(word, args)
Exemplo n.º 3
0
    def build_pronunciation(self, element, word_form):
        # Accepts <sound> element as input

        pronunciation = None

        wavElementText = cdict.DICT_UNICODE_EMPTY_STR
        wprElementText = cdict.DICT_UNICODE_EMPTY_STR

        subElements = element.find_all(['wav', 'wpr'])
        for subElement in subElements:
            subElementText = subElement.get_text().strip()

            if subElement.name == 'wav':
                subElementText = self.build_audio_url(subElementText)
                wavElementText = subElementText
                coutput.print_watcher('wavElementText')

            elif subElement.name == 'wpr':
                wprElementText = subElementText
                coutput.print_watcher('wprElementText')

        if wavElementText != cdict.DICT_UNICODE_EMPTY_STR:
            pronunciation = cdict.WordPronunciation(wavElementText)

            if wprElementText != cdict.DICT_UNICODE_EMPTY_STR:
                pronunciation.word_pronunciation = wprElementText

            pronunciation.form = word_form
            pronunciation.spelling = word_form.replace('*', '')

        return pronunciation
Exemplo n.º 4
0
    def build_entry_url(self, key_word):
        _FUNC_NAME_ = "DictionaryConfig.build_entry_url"

        coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'key_word')
        coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_,
                              'coutput.normalize(key_word)')
        #return self.entry_url_format.format(WORD=key_word).replace(u" ", u"%20")
        return self.entry_url_format.format(
            WORD=coutput.normalize(key_word)).replace(u" ", u"%20")
Exemplo n.º 5
0
    def build_entry_url(self, key_word):

        coutput.print_watcher('key_word')
        coutput.print_watcher('coutput.normalize(key_word)')

        #return self.entry_url_format.format(WORD=key_word, KEY=self.api_key).replace(u" ", u"%20")
        return self.entry_url_format.format(WORD=coutput.normalize(key_word),
                                            KEY=self.api_key).replace(
                                                " ", "%20")
Exemplo n.º 6
0
    def _get_pronunciations(self, root):
        """ Returns list of IPA for regular and 'alternative' pronunciation. """
        _FUNC_NAME_ = "CollegiateDictionary._get_pronunciations"

        prons = root.find("./pr")
        pron_list = []
        if prons is not None:
            ps = self._flatten_tree(prons, exclude=['it'])
            pron_list.extend(ps)
        
        coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'pron_list')
        return pron_list
Exemplo n.º 7
0
    def override_definitions(self, source, entry_word, overrides):
        _FUNC_NAME_ = "SimplifiedWordEntry.override_definitions"
        if len(overrides) > 0:
            self.source = source
            self.entry_word = entry_word

            # Remove duplicate definitions
            for override in overrides:

                # Handle overrides that are marked special by the application using a prefix e.g. *
                override_text = re.sub(ur'(^[^\(a-zA-Z0-9]|[\. ]+$)',
                                       DICT_UNICODE_EMPTY_STR,
                                       override,
                                       flags=re.IGNORECASE)
                coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'override')
                coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_,
                                      'override_text')

                for definition in self.definitions:
                    definition_text = re.sub(ur'(^[^\(a-zA-Z0-9]|[\. ]+$)',
                                             DICT_UNICODE_EMPTY_STR,
                                             definition,
                                             flags=re.IGNORECASE)
                    coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_,
                                          'definition')
                    coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_,
                                          'definition_text')
                    if definition_text == override_text:
                        self.definitions.remove(definition)
                        coutput.print_debug(MOD_ERR_DEBUG, _FUNC_NAME_,
                                            "Removed duplicate definition")

            # Override definitions
            self.definitions = overrides + self.definitions
Exemplo n.º 8
0
    def build_sound_url(self, fragment):
        _FUNC_NAME_ = "build_sound_url"

        coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'fragment')

        base_url = "http://media.merriam-webster.com/soundc11"
        number_prefix_match = re.search(r'^([0-9]+)', fragment)
        special_prefix_match = re.search(r'^(gg|bix)', fragment)
        if number_prefix_match:
            prefix = "number"
        elif special_prefix_match:
            prefix = special_prefix_match.group(1)
        else:
            prefix = fragment[0]
        return "{0}/{1}/{2}".format(base_url, prefix, fragment)
Exemplo n.º 9
0
    def _get_pronunciations(self, root):
        """ Returns list of IPA for regular and 'alternative' pronunciation. """
        _FUNC_NAME_ = "LearnersDictionary._get_pronunciations"

        prons = root.find("./pr")
        pron_list = []
        if prons is not None:
            ps = self._flatten_tree(prons, exclude=['it'])
            pron_list.extend(ps)
        prons = root.find("./altpr")
        if prons is not None:
            ps = self._flatten_tree(prons, exclude=['it'])
            pron_list.extend(ps)
        coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, '<ReplaceText>') 
        return [p.strip(', ') for p in pron_list]
Exemplo n.º 10
0
    def download_entry(self, connection_pool, key_word):

        connectionResponse = connection_pool.request(
            'GET', self.config.build_entry_url(key_word))

        coutput.print_watcher("key_word")
        coutput.print_watcher("self.config.build_entry_url(key_word)")
        coutput.print_watcher("connectionResponse")

        # Perform unicode conversion
        coutput.print_watcher("connectionResponse.data")
        entryData = connectionResponse.data.decode('utf8')
        coutput.print_watcher("entryData")

        return entryData
Exemplo n.º 11
0
def play_legacy(fileName, audioOutput, loopCount, loopDelaySec):
    # Reference:
    # https://www.pygame.org/docs/ref/mixer.html#pygame.mixer.init
    # http://techqa.info/programming/question/27745134/how-can-i-extract-the-metadata-and-bitrate-info-from-a-audio/video-file-in-python

    try:
        #Enable for RaspberryPi
        coutput.print_debug("Executing set_audio_output")
        set_audio_output(audioOutput)

        coutput.print_debug("Executing mediainfo")
        fileInfo = mediainfo(fileName)

        coutput.print_watcher("fileName")
        coutput.print_watcher("fileInfo['sample_rate']")
        coutput.print_watcher("fileInfo['bits_per_sample']")
        coutput.print_watcher("fileInfo['channels']")

        for loopIndex in range(0, loopCount):
            # Syntax: init(frequency=22050, size=-16, channels=2, buffer=4096)
            pygame.mixer.init()
            #pygame.mixer.init(frequency=long(float(fileInfo['sample_rate'])), channels=int(fileInfo['channels']))

            coutput.print_debug("Executing pygame.mixer.music.load")
            pygame.mixer.music.load(fileName)

            coutput.print_debug("Executing pygame.mixer.music.play")
            pygame.mixer.music.play()
            while pygame.mixer.music.get_busy() == True:
                continue
            time.sleep(
                0.06
            )  # introduce delay to ensure that the end of the audio is not clipped during playback

            coutput.print_debug("Executing pygame.mixer.stop")
            pygame.mixer.stop()
            coutput.print_debug("Executing pygame.mixer.quit")
            pygame.mixer.quit()

            if loopIndex != (loopCount - 1):
                time.sleep(loopDelaySec)

        set_audio_output('auto')
    except:
        coutput.print_err("Unable to play audio from " + fileName)
        coutput.print_watcher("sys.exc_info()")
Exemplo n.º 12
0
def get_audio_output(audioOutput):
    # Reference:
    # https://wiki.archlinux.org/index.php/Advanced_Linux_Sound_Architecture/Troubleshooting#HDMI_Output_does_not_work

    coutput.print_watcher("alsaAudioOutputConfig")
    coutput.print_watcher("platform.node()")
    coutput.print_watcher("audioOutput.lower()")
    coutput.print_watcher(
        "alsaAudioOutputConfig[platform.node()][audioOutput.lower()]")
    return alsaAudioOutputConfig[platform.node()][audioOutput.lower()]
Exemplo n.º 13
0
def lookup_word(connectionPool, pronAudioOutput, pronLoopCount, pronLoopDelaySec, word, *lookupSource):
    _FUNC_NAME_ = "lookup_word"

    isError = False

    dictSources = []
    if len(lookupSource) == 0:
        dictEntry = fetch_dictionary_entry(connectionPool, word)

        currentDefinitions = dictEntry[1]
        source = dictEntry[2]
        currentClipWord = dictEntry[3]
        currentClipURL = dictEntry[4]
        pronSource = dictEntry[5]

        display_dictionary_entry(connectionPool, pronAudioOutput, pronLoopCount, pronLoopDelaySec, word, currentDefinitions, source, currentClipWord, currentClipURL, pronSource)

    elif lookupSource[0].lower() == 'all' or lookupSource[0].lower() in DICT_SOURCES.keys():
        
        if lookupSource[0].lower() == 'all':
            dictSources = dictSources + PRIORITIZED_DICT_SOURCES

        else:                
            dictSources.append(DICT_SOURCES[lookupSource[0].lower()])

        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'dictSources')

        for dictSource in dictSources:
            source = dictSource.get_dictionary_source()
            pronSource = dictSource.get_dictionary_source()

            coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'source')

            dictEntryText = dictSource.get_dictionary_entry(connectionPool, word)
            currentDefinitions = dictSource.parse_word_definition(word, dictEntryText)
            [currentClipWord, currentClipURL] = dictSource.parse_word_clip(word, dictEntryText)

            display_dictionary_entry(connectionPool, pronAudioOutput, pronLoopCount, pronLoopDelaySec, word, currentDefinitions, source, currentClipWord, currentClipURL, pronSource)

    else:
        print ""
        displayMessage = "ERROR: Unable to lookup {WORD}. Dictionary source {SOURCE} not supported".format(WORD=word, SOURCE=lookupSource[0])
        coutput.print_color(ERROR_TEXT_COLOR, displayMessage)

    print ""
Exemplo n.º 14
0
    def __init__(self, word, attrs):
        _FUNC_NAME_ = "CollegiateDictionaryEntry.__init__"

        self.word = word
        self.headword = attrs.get('headword')
        self.spelling = attrs.get('spelling')
        self.function = attrs.get('functional_label')
        
        self.pronunciation = attrs.get("pronunciation")
        coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, "self.pronunciation")

        #self.pronunciations = attrs.get("pronunciations")     
        self.inflections = attrs.get("inflections")
        self.senses = attrs.get("senses")
        self.audio = [self.build_sound_url(f) for f in
                      attrs.get("sound_fragments")]
        self.illustrations = [self.build_illustration_url(f) for f in
                              attrs.get("illustration_fragments")]
Exemplo n.º 15
0
def get_dictionary_entry(connectionPool, word):
    _FUNC_NAME_ = "get_dictionary_entry"

    # Download dictionary entry
    dictEntryURL = DICT_ENTRY_URL.format(WORD=word,
                                         KEY=DICT_KEY).replace(" ", "%20")
    dictEntryURL = dictEntryURL.encode('utf-8')  # Handle URL strings in ascii
    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'dictEntryURL')

    dictEntryResponse = connectionPool.request('GET', dictEntryURL)
    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'dictEntryResponse.data')

    # Convert XML to unicode
    if isinstance(dictEntryResponse.data, str):
        outputXML = unicode(dictEntryResponse.data, 'utf-8')
    else:
        outputXML = dictEntryResponse.data

    return outputXML
Exemplo n.º 16
0
    def build_cross_entries(self, element, entry_word):
        # Accepts <cx> element as input and returns an inflection

        wordInfl = cdict.WordInflection(entry_word)
        wordInfl.spelling = entry_word.replace('*', '')

        for subelement in element.find_all(['cl', 'ct']):

            if subelement.name == 'cl':
                subElementText = subelement.get_text().strip()
                subElementText = re.sub(r' of$',
                                        '',
                                        subElementText,
                                        flags=re.UNICODE)
                wordInfl.functional_label = subElementText
            elif subelement.name == 'ct':
                wordInfl.senses.extend(self.build_senses(subelement))

        coutput.print_watcher('wordInfl')
        return wordInfl
Exemplo n.º 17
0
def parse_word_definition(word, entryXML):
    _FUNC_NAME_ = "parse_word_definition"
    searchWord = word

    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'entryXML')

    sourceXML = entryXML
    if isinstance(sourceXML, unicode):
        sourceXML = sourceXML.encode('utf-8')

    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'sourceXML')

    wordDefinition = []
    dictionary = api.CollegiateDictionary(DICT_KEY)

    try:
        entries = dictionary.lookup(searchWord, sourceXML)
        for entry in entries:
            for sense in entry.senses:

                coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_,
                                      'sense.definition')

                strDefinition = unicode("({0}) {1}", 'utf-8').format(
                    entry.function, sense.definition)
                if isinstance(strDefinition, str):
                    wordDefinition.append(unicode(strDefinition, 'utf-8'))
                else:
                    wordDefinition.append(strDefinition)

    except api.WordNotFoundException:
        wordDefinition = []

    return wordDefinition
Exemplo n.º 18
0
def play(fileName, audioOutput, loopCount, loopDelaySec):
    # Reference:
    # https://realpython.com/playing-and-recording-sound-python/#playing-audio-files
    # https://askubuntu.com/questions/115369/how-to-play-mp3-files-from-the-command-line
    # https://www.ffmpeg.org/ffplay.html
    # https://www.ffmpeg.org/ffmpeg-devices.html#Examples-8
    # Use aplay -L to find audio output device. e.g. HDMI is plughw

    playCommand = "ffmpeg -f alsa {outputdevice} -loglevel quiet -i {filename} 2>/dev/null".format(
        outputdevice=get_audio_output(audioOutput), filename=fileName)

    try:

        coutput.print_watcher("fileName")

        for loopIndex in range(0, loopCount):
            coutput.print_debug("Executing play")
            coutput.print_watcher("playCommand")
            os.system(playCommand)

            if loopIndex != (loopCount - 1):
                time.sleep(loopDelaySec)

    except:
        coutput.print_err("Unable to play audio from " + fileName)
        coutput.print_watcher("sys.exc_info()")
Exemplo n.º 19
0
    def compare_word_form(self, key_word, entry_word):

        keyWordToken = coutput.tokenize(key_word)
        entryWordToken = coutput.tokenize(entry_word)

        coutput.print_watcher('keyWordToken')
        coutput.print_watcher('entryWordToken')

        if keyWordToken != entryWordToken:
            coutput.print_warn(
                "A different form of the word is being pronounced.")

            for posPattern in self.posRules:
                coutput.print_watcher("posPattern['form']")

                if posPattern['regexPattern'].match(keyWordToken):
                    coutput.print_tip(
                        "The {0} form ({1}) of the word is to be spelled.".
                        format(posPattern['form'], posPattern['pattern']))
                    break
Exemplo n.º 20
0
    def compare_word_form(self, key_word, entry_word):
        _FUNC_NAME_ = "DictionaryAssistant.compare_word_form"

        keyWordToken = coutput.tokenize(key_word)
        entryWordToken = coutput.tokenize(entry_word)

        coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'keyWordToken')
        coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'entryWordToken')

        if keyWordToken != entryWordToken:
            coutput.print_warn(
                "A different form of the word is being pronounced.")

            for posPattern in self.posRules:
                coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_,
                                      "posPattern['form']")

                if posPattern['regexPattern'].match(keyWordToken):
                    coutput.print_tip(
                        "The {0} form ({1}) of the word is to be spelled.".
                        format(posPattern['form'], posPattern['pattern']))
                    break
Exemplo n.º 21
0
def fetch_dictionary_entry(connectionPool, word):
    _FUNC_NAME_ = "fetch_dictionary_entry"

    wordDefinitionSource = ""
    wordDefinitions = []
    wordDefinitionFound = False

    pronunciationSource = ""
    pronunciationWord = ""
    pronunciationURL = ""
    wordPronunciationFound = False

    for dictSource in PRIORITIZED_DICT_SOURCES:
        dictEntryText = dictSource.get_dictionary_entry(connectionPool, word)

        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'dictSource')

        if wordDefinitionFound == False:
            currentDefinitions = dictSource.parse_word_definition(word, dictEntryText)
            if len(currentDefinitions) > 0:
                wordDefinitionSource = dictSource.get_dictionary_source()
                wordDefinitions = currentDefinitions
                wordDefinitionFound = True

                coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'wordDefinitionSource')

        if wordPronunciationFound == False:
            [currentClipWord, currentClipURL] = dictSource.parse_word_clip(word, dictEntryText)
            if currentClipWord != "":
                pronunciationSource = dictSource.get_dictionary_source()
                [pronunciationWord, pronunciationURL] = [currentClipWord, currentClipURL]
                wordPronunciationFound = True

                coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'pronunciationSource')

        if wordDefinitionFound == True and wordPronunciationFound == True:
            break

    return [word, wordDefinitions, wordDefinitionSource, pronunciationWord, pronunciationURL, pronunciationSource]
Exemplo n.º 22
0
    def set_simplified_word_entry(self):

        simplifiedWordEntry = None

        matchEntries = []
        matchInflection = None
        matchType = "none"
        matchEntryFound = False

        # Identify matching entry

        # Pass #1: Find matching entry word
        if not matchEntryFound:
            for we in self.word_entries:
                if self.key_word == we.entry_word:
                    matchEntries.append(we)
                    matchEntryFound = True
                    matchType = "entryword"

        # Pass #2: Find matching inflection
        if not matchEntryFound:
            for we in self.word_entries:
                for infl in we.inflections:
                    if self.key_word == infl.spelling:
                        matchEntries.append(we)
                        matchInflection = infl
                        matchEntryFound = True
                        matchType = "inflection"
                        break

                if matchEntryFound:
                    break

        # Pass #3: Default as first entry, if no match found
        if not matchEntryFound:
            for we in self.word_entries:
                matchEntries.append(we)
                matchEntryFound = True
                matchType = "default"
                break

        # Populate conformed entry attributes
        coutput.print_watcher('matchEntryFound')
        coutput.print_watcher('matchType')

        # If matching entry is found, populate pronunciation attributes
        if matchEntryFound:

            if matchType == "inflection":
                simplifiedWordEntry = SimplifiedWordEntry(
                    matchEntries[0].source, self.key_word,
                    matchInflection.spelling)
                simplifiedWordEntry.functional_label = coutput.coalesce(
                    matchInflection.functional_label,
                    matchEntries[0].functional_label)

                if matchInflection.pronunciation is not None:
                    coutput.print_watcher(
                        'matchInflection.pronunciation.audio_url')
                    simplifiedWordEntry.pronunciation = WordPronunciation(
                        matchInflection.pronunciation.audio_url)
                    simplifiedWordEntry.pronunciation.word_pronunciation = matchInflection.pronunciation.word_pronunciation
                    simplifiedWordEntry.pronunciation.form = matchInflection.pronunciation.form
                    simplifiedWordEntry.pronunciation.spelling = matchInflection.pronunciation.spelling

                    coutput.print_watcher(
                        'simplifiedWordEntry.pronunciation.word_pronunciation')
                    coutput.print_watcher(
                        'simplifiedWordEntry.pronunciation.form')
                    coutput.print_watcher(
                        'simplifiedWordEntry.pronunciation.spelling')

                if matchInflection.respelling is not None:
                    simplifiedWordEntry.respelling = WordRespelling(
                        matchInflection.respelling.text,
                        matchInflection.respelling.source)
                    simplifiedWordEntry.respelling.form = matchInflection.respelling.form
                    simplifiedWordEntry.respelling.spelling = matchInflection.respelling.spelling

            else:
                simplifiedWordEntry = SimplifiedWordEntry(
                    matchEntries[0].source, self.key_word,
                    matchEntries[0].entry_word)
                simplifiedWordEntry.functional_label = matchEntries[
                    0].functional_label

                coutput.print_watcher('matchEntries[0].pronunciation')
                simplifiedWordEntry.pronunciation = copy.deepcopy(
                    matchEntries[0].pronunciation)

                simplifiedWordEntry.respelling = copy.deepcopy(
                    matchEntries[0].respelling)

            # Consolidate etymology and senses (definitions and examples)
            etymologies = []
            definitions = []

            for we in matchEntries:

                coutput.print_watcher('we')

                if we.etymology != DICT_UNICODE_EMPTY_STR and we.etymology not in etymologies:
                    etymologies.append(we.etymology)

                flText = DICT_UNICODE_EMPTY_STR
                if we.functional_label != DICT_UNICODE_EMPTY_STR:
                    flText = "({0}) ".format(we.functional_label)

                for sense in we.senses:
                    defnText = flText + str(sense.definition)
                    if defnText not in definitions:
                        definitions.append(defnText)

                # Handle inflections within matching entries
                for infl in we.inflections:

                    flText = DICT_UNICODE_EMPTY_STR
                    if infl.functional_label != DICT_UNICODE_EMPTY_STR:
                        flText = "({0}) ".format(infl.functional_label)

                    for sense in infl.senses:
                        defnText = flText + str(sense.definition)
                        if defnText not in definitions:
                            definitions.append(defnText)

            simplifiedWordEntry.etymology = "; ".join(et for et in etymologies)
            simplifiedWordEntry.definitions = definitions[:]

        # Else if no matching entry is found, create a skeleton entry
        else:
            simplifiedWordEntry = SimplifiedWordEntry(DICT_UNICODE_EMPTY_STR,
                                                      self.key_word,
                                                      DICT_UNICODE_EMPTY_STR)

        # Set conformed entry
        self.simplified_word_entry = simplifiedWordEntry

        coutput.print_watcher('simplifiedWordEntry')
Exemplo n.º 23
0
def parse_word_clip(word, entryXML):
    _FUNC_NAME_ = "parse_word_clip"
    searchWord = word

    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'entryXML')

    sourceXML = entryXML
    if isinstance(sourceXML, unicode):
        sourceXML = sourceXML.encode('utf-8')

    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'sourceXML')

    dictionary = api.CollegiateDictionary(DICT_KEY)

    wordFound = False
    audioClipFound = False
    audioClip = DICT_UNICODE_EMPTY_STR
    audioClipWord = DICT_UNICODE_EMPTY_STR
    audioClipPron = DICT_UNICODE_EMPTY_STR

    try:
        # Pass #1: Find matching headword spelling
        coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Start of Pass #1")

        entries = dictionary.lookup(searchWord, sourceXML)
        for entry in entries:

            coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'entry.spelling')

            if searchWord == entry.spelling:
                for audio in entry.audio:
                    audioClipWord = entry.spelling
                    audioClipPron = entry.pronunciation
                    wordFound = True
                    audioClip = audio
                    audioClipFound = True
                    if wordFound:
                        break
            if wordFound:
                break

        coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "End of Pass #1")
        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClipFound')
        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'wordFound')

        # Pass #2: Find matching inflection
        coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Start of Pass #2")
        if audioClipFound == False:
            wordFound = False
            audioClip = DICT_UNICODE_EMPTY_STR
            audioClipWord = DICT_UNICODE_EMPTY_STR
            audioClipPron = DICT_UNICODE_EMPTY_STR

            entries = dictionary.lookup(searchWord, sourceXML)
            for entry in entries:

                coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'entry.spelling')

                for inflection in entry.inflections:

                    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_,
                                          'inflection.spellings')

                    for spelling in inflection.spellings:

                        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_,
                                              'searchWord')
                        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_,
                                              'spelling')

                        if searchWord == spelling:
                            audioClipWord = spelling
                            wordFound = True

                            coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_,
                                                  'inflection.sound_urls')

                            for sound_url in inflection.sound_urls:

                                coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_,
                                                      'sound_url')

                                audioClip = sound_url
                                audioClipFound = True
                                break

                        if wordFound:
                            break

                if wordFound:
                    break

        coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "End of Pass #2")
        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClipFound')
        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'wordFound')

        # Pass #3: Find pronunciation for first entry, if no match found
        coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Start of Pass #3")
        if audioClipFound == False:
            wordFound = False
            audioClip = DICT_UNICODE_EMPTY_STR
            audioClipWord = DICT_UNICODE_EMPTY_STR
            audioClipPron = DICT_UNICODE_EMPTY_STR

            entries = dictionary.lookup(searchWord, sourceXML)
            for entry in entries:
                for audio in entry.audio:
                    audioClipWord = entry.spelling
                    wordFound = True
                    audioClip = audio
                    audioClipFound = True
                    if wordFound:
                        break
                if wordFound:
                    break

        coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "End of Pass #3")
        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClipFound')
        coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'wordFound')

    except api.WordNotFoundException:
        audioClip = DICT_UNICODE_EMPTY_STR
        audioClipWord = DICT_UNICODE_EMPTY_STR
        audioClipPron = DICT_UNICODE_EMPTY_STR

    if not audioClipFound:
        audioClip = DICT_UNICODE_EMPTY_STR
        audioClipWord = DICT_UNICODE_EMPTY_STR
        audioClipPron = DICT_UNICODE_EMPTY_STR

    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'searchWord')
    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClipWord')
    coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClip')

    # Return audioClipWord and audioClip, if found
    if isinstance(audioClipWord, str):
        audioClipWord = unicode(audioClipWord, 'utf-8')

    if isinstance(audioClip, str):
        audioClip = unicode(audioClip, 'utf-8')

    if isinstance(audioClipPron, str):
        audioClipPron = unicode(audioClipPron, 'utf-8')

    return [audioClipWord, audioClip, audioClipPron]
Exemplo n.º 24
0
    def override_entry(self, source, entry_word, overrides):

        if len(overrides) > 0:
            self.entry_word = entry_word

            overrideInfo = {}
            overrideDefinitions = []
            for override in overrides:
                if override.startswith('#!'):
                    override_elements = override.split(':')
                    override_name = override_elements[0].strip()
                    override_value = re.sub('^#![a-zA-Z0-9]+: ',
                                            DICT_UNICODE_EMPTY_STR,
                                            override).strip()

                    if override_value != DICT_UNICODE_EMPTY_STR:
                        overrideInfo[override_name] = override_value
                else:
                    if override != DICT_UNICODE_EMPTY_STR:
                        overrideDefinitions.append(override)

            # Process info lines
            for key in overrideInfo:
                if key == '#!Etymology':
                    self.etymology = overrideInfo[key]

                elif key == '#!AudioURL':
                    if self.pronunciation is None:
                        self.pronunciation = WordPronunciation(
                            overrideInfo[key])
                    else:
                        self.pronunciation.audio_url = overrideInfo[key]

                    if '#!Word' in overrideInfo.keys():
                        self.pronunciation.form = overrideInfo['#!Word']
                        self.pronunciation.spelling = overrideInfo['#!Word']

                elif key == '#!Respelling':
                    if self.respelling is None:
                        #self.respelling = WordRespelling(overrideInfo[key], overrideInfo['#!Source'])
                        self.respelling = WordRespelling(
                            overrideInfo[key], source)
                    else:
                        #self.respelling.source = self.respelling.source + ';' + overrideInfo['#!Source']
                        self.respelling.source = self.respelling.source + ';' + source
                        self.respelling.text = overrideInfo[key]

                    if '#!Word' in overrideInfo.keys():
                        self.respelling.form = overrideInfo['#!Word']
                        self.respelling.spelling = overrideInfo['#!Word']

                elif key == '#!Sentence':
                    self.usage = [overrideInfo[key]] + self.usage

                elif key == '#!Examples':
                    self.examples = overrideInfo[key]

                else:
                    self.definitions.append("{}: {}".format(
                        key, overrideInfo[key]))

            # Process #!Source info lines
            if '#!Source' in overrideInfo.keys():
                altSource = overrideInfo['#!Source']
            else:
                altSource = source

            if self.source == DICT_UNICODE_EMPTY_STR:
                self.source = altSource
            else:
                self.source = self.source + ';' + altSource

            # Process definitions
            # Remove duplicate definitions
            for override in overrideDefinitions:

                # Handle overrides that are marked special by the application using a prefix e.g. *
                override_text = re.sub(r'(^[^\(a-zA-Z0-9]|[\. ]+$)',
                                       DICT_UNICODE_EMPTY_STR,
                                       override,
                                       flags=re.IGNORECASE)
                coutput.print_watcher('override')
                coutput.print_watcher('override_text')

                for definition in self.definitions:
                    definition_text = re.sub(r'(^[^\(a-zA-Z0-9]|[\. ]+$)',
                                             DICT_UNICODE_EMPTY_STR,
                                             definition,
                                             flags=re.IGNORECASE)
                    coutput.print_watcher('definition')
                    coutput.print_watcher('definition_text')
                    if definition_text == override_text:
                        self.definitions.remove(definition)
                        coutput.print_debug("Removed duplicate definition")

            # Override definitions
            self.definitions = overrideDefinitions + self.definitions
Exemplo n.º 25
0
    def set_word_entries(self):

        soup = BeautifulSoup(self.entry_raw_text, self.config.parser)
        nameFilter = re.compile(r'(hw|fl|pr|et|sound|def|cx|art)')

        for entry in soup.find_all('entry'):
            """
            <!ELEMENT entry
              (((subj?, art?, formula?, table?),
                    hw,
                    (pr?, pr_alt?, pr_ipa?, pr_wod?, sound?)*,
                    (ahw, (pr, pr_alt?, pr_ipa?, pr_wod?, sound?)?)*,
                    vr?),
                 (fl?, in*, lb*, ((cx, (ss | us)*) | et)*, sl*),
                 (dx | def)*,
                 (list? |
                   (uro*, dro*, ((pl, pt, sa?) |
                                  (note) |
                                  quote+)*)))>
            """

            # Capture and exclude miscellaneous entries from main entry:
            # * inflections <in>
            # * defined run-on entries <dro>
            # * undefined run-on entries <uro>
            # * variants <vr>

            miscElements = entry.find_all(['in', 'dro', 'uro', 'vr'])
            [x.extract() for x in entry.findAll(['in', 'dro', 'uro', 'vr'])]

            coutput.print_debug("Process all <ew> elements")
            for element in entry.find_all('ew'):

                elementText = element.get_text().strip()
                wordEntry = cdict.WordEntry(self.config.name, elementText)

            for element in entry.find_all(nameFilter):

                elementText = element.get_text().strip()
                coutput.print_watcher('element.name')
                coutput.print_watcher('elementText')

                if element.name == 'hw':
                    coutput.print_debug("Process <hw> element")
                    wordEntry.head_word = elementText

                elif element.name == 'fl':
                    coutput.print_debug("Process <fl> element")
                    wordEntry.functional_label = elementText

                elif element.name == 'et':
                    coutput.print_debug("Process <et> element")
                    wordEntry.etymology = elementText

                elif element.name == 'pr':
                    wordEntry.respelling = self.build_respelling(
                        element, wordEntry.entry_word)

                elif element.name == 'sound':
                    wordEntry.pronunciation = self.build_pronunciation(
                        element, wordEntry.entry_word)

                elif element.name == 'art':
                    wordEntry.illustrations.extend(
                        self.build_illustrations(element,
                                                 wordEntry.entry_word))

                elif element.name == 'def':
                    coutput.print_debug("Process <def> element")
                    wordEntry.senses.extend(self.build_senses(element))

                elif element.name == 'cx':
                    # Process cross-entry <cx> elements as inflections
                    wordEntry.inflections.append(
                        self.build_cross_entries(element,
                                                 wordEntry.entry_word))

            # Process previously captured misc. elements from main entry as inflections
            for miscElement in miscElements:

                for element in miscElement.find_all(['if', 'ure', 'drp',
                                                     'va']):

                    elementText = element.get_text().strip()
                    winf = cdict.WordInflection(elementText)
                    winf.spelling = elementText.replace('*', '')

                    if element.name == 'ure':
                        winf.functional_label = "undefined run-on entry"
                    elif element.name == 'drp':
                        winf.functional_label = "defined run-on phrase"
                    elif element.name == 'va':
                        winf.functional_label = "variant form"

                for element in miscElement.find_all(
                    ['il', 'sound', 'pr', 'def']):

                    DEBUG_VAR = "element.name"
                    coutput.print_debug("{0} :: {1}".format(
                        DEBUG_VAR, eval(DEBUG_VAR)))

                    elementText = element.get_text().strip()
                    DEBUG_VAR = "elementText"
                    coutput.print_debug("{0} :: {1}".format(
                        DEBUG_VAR, eval(DEBUG_VAR)))

                    if element.name == 'il':
                        winf.functional_label = elementText

                    elif element.name == 'pr':
                        winf.respelling = self.build_respelling(
                            element, winf.form)

                    elif element.name == 'sound':
                        winf.pronunciation = self.build_pronunciation(
                            element, winf.form)

                    elif element.name == 'def':
                        winf.senses.extend(self.build_senses(element))

                wordEntry.inflections.append(winf)

            coutput.print_watcher('wordEntry')
            self.word_entries.append(wordEntry)
Exemplo n.º 26
0
# Main Program
################################################################

_FUNC_NAME_ = "main"

dictConfig = cdictapi.DictionaryConfig()
dictAssist = cdictassist.DictionaryAssistant(dictConfig)

connectionPool = urllib3.PoolManager(10, headers=SDO_USER_AGENT)

logEntries = cfile.read(SDO_LIST_FILE).splitlines()

print "Downloading overrides ..."

for entry in logEntries:
    coutput.print_watcher(SDO_ERR_DEBUG, _FUNC_NAME_, 'entry')

    logValues = entry.split(':')

    word = logValues[1]

    if not os.path.isfile(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word)):
        cfile.write(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word),
                    dictAssist.download_entry(connectionPool, word))

    wordEntry = cfile.read(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word))
    wordDictionary = cdictapi.DictionaryEntry(dictConfig, word, wordEntry)
    coutput.print_watcher(SDO_ERR_DEBUG, _FUNC_NAME_, 'wordEntry')

    SDO_ERR_DEFN_MISSING = False
    SDO_ERR_CLIP_MISSING = False
SB_ERR_DEBUG = False

################################################################
# Main Program
################################################################

_FUNC_NAME_ = "main"

connectionPool = urllib3.PoolManager(10, headers=SDO_USER_AGENT)

logEntries = cfile.read(SDO_LIST_FILE).splitlines()

print "Downloading overrides ..."

for entry in logEntries:
    coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'entry')

    logValues = entry.split(':')
    
    word = logValues[1]
    wordEntry = cdict.fetch_dictionary_entry(connectionPool, word)
    coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'wordEntry')

    SDO_ERR_DEFN_MISSING = False
    SDO_ERR_CLIP_MISSING = False
    
    print unicode("Word: {0}\t{1}", 'utf-8').format(word, logValues[2])

    if SDO_ERR_DEFN_REGEX_PATTERN.match(logValues[2]):
        coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'wordEntry[1]')
        if len(wordEntry[1]) > 0:
Exemplo n.º 28
0
setRowElements = moduleTableElement.find_elements_by_xpath("//table/tbody/tr")

setCounter = 0
setEntries = []
processFlag = False

for setRowElement in setRowElements:
    setCounter = setCounter + 1

    setColElement = setRowElement.find_element_by_xpath(".//td")
    setName = setColElement.text.strip().lower().replace(" ", "-")
    setID = "{:03d}-".format(setCounter) + setName
    setURL = setRowElement.find_element_by_xpath(".//a[contains(@href,'&bt=r') and not(contains(@href,'_test_'))]").get_property("href")

    print("Checking set {}.".format(setName))
    coutput.print_watcher("setID")
    coutput.print_watcher("setURL")

    if len(APP_SELECT_SET_LIST) > 0:
        if setName in APP_SELECT_SET_LIST:
            processFlag = True
        else:
            processFlag = False
    else:
        if APP_START_SET_NM == APP_EMPTY_STRING:
            processFlag = True
        elif setName == APP_START_SET_NM:
            processFlag = True

        if setName == APP_STOP_SET_NM:
            processFlag = False
Exemplo n.º 29
0
                            currOrigin = currOrigin + u'; ' + elementText

                    elif u'ety-sl' in element['class']:
                        elementText = element.get_text().strip()

                        if currOrigin == cdict.DICT_UNICODE_EMPTY_STR:
                            currOrigin = elementText
                        else:
                            currOrigin = currOrigin + elementText

        for entry in soup.find_all(self.config.is_required_element):

            if entry.name == u'div' and entry.has_attr(u'class') and any(
                    re.compile(ur'entry-.*').match(x)
                    for x in entry.attrs[u'class']):
                coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'entry')

                # Process head word: <h1 class="hword"> or <p class="hword">
                for element in entry.find_all(class_="hword"):
                    elementText = element.get_text().strip()
                    currEntryWord = elementText
                    #coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'currEntryWord')

                # Process functional label: <span class="fl">
                for element in entry.find_all('span', class_="fl"):
                    elementText = element.get_text().strip()
                    elementText = re.sub(ur'[ ]*\(.*$',
                                         u'',
                                         elementText,
                                         flags=re.UNICODE)
                    currFuncLabel = elementText