예제 #1
0
def yield_jpn_pron(request: requests_html,
                   config: "Config") -> "Iterator[Pron]":
    # For simplicity, just want to grab the first transcription.
    # Will encounter words that have no transcription.
    pron_element = request.html.xpath(config.pron_xpath_selector, first=True)
    if pron_element:
        yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)
예제 #2
0
파일: jpn.py 프로젝트: anidhi/wikipron
def _yield_jpn_lower_pron(request: requests.Response, config: "Config",
                          word_target: str, heading: str) -> "Iterator[Pron]":
    pron_path = _PRON_XPATH_SELECTOR.format(
        word_to_work_from=word_target,
        heading=heading,
        second_ul="",
    )
    for pron_element in request.html.xpath(pron_path):
        prons = list(yield_pron(pron_element, IPA_XPATH_SELECTOR, config))
        upper_prons = _yield_jpn_upper_pron(request, config, word_target,
                                            heading)
        # There is a possible, though seemingly unlikely, undesirable side
        # effect of this approach. This could potentially match a second
        # "upper" pronunciation (most likely in a second Etymology) and
        # append it to the "lower" pronunciations yielded from the first
        # Etymology. Or append the "upper" pronunciation of the first
        # Etymology to the "lower" pronunciation of the second Etymology, etc.
        # Fortunately entries with multiple <ul>'s and multiple
        # etymologies are exceedingly rare. I'm not sure I've seen any.
        try:
            prons += next(upper_prons)
        except StopIteration:
            # Did not find a second <ul>
            pass
        # Yielding here because we don't want to collect all pronunciation
        # entries on a page at the same time. Doing so would make it difficult
        # to connect words to their prons appropriately. We only want to grab
        # the prons that are linked to the word we have scraped.
        yield prons
예제 #3
0
def _yield_latin_pron(request: requests.Response, config: "Config",
                      tag: str) -> "Iterator[Pron]":
    heading = "h2" if tag == "Latin" else "h3"
    if config.dialect:
        dialect_selector = _PRON_WITH_DIALECT_XPATH_SELECTOR_TEMPLATE.format(
            dialects_text=" or ".join(f'text() = "{d.strip()}"'
                                      for d in config.dialect.split("|")))
    else:
        dialect_selector = (
            '[descendant::a[@title = "Appendix:Latin pronunciation"]]')
    pron_xpath_selector = _PRON_XPATH_TEMPLATE.format(
        heading=heading, tag=tag, dialect_selector=dialect_selector)
    for pron_element in request.html.xpath(pron_xpath_selector):
        yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)
예제 #4
0
파일: jpn.py 프로젝트: anidhi/wikipron
def _yield_jpn_upper_pron(request: requests.Response, config: "Config",
                          word_target: str, heading: str) -> "Iterator[Pron]":
    pron_path = _PRON_XPATH_SELECTOR.format(
        word_to_work_from=word_target,
        heading=heading,
        second_ul="[preceding-sibling::*[1][self::ul]]",
    )
    try:
        request.html.xpath(pron_path)[0]
    except IndexError:
        return

    for upper_pron_ele in request.html.xpath(pron_path):
        prons = list(yield_pron(upper_pron_ele, IPA_XPATH_SELECTOR, config))
        yield prons
예제 #5
0
def extract_word_pron_shan(word: "Word", request: requests_html,
                           config: "Config") -> "Iterator[WordPronPair]":
    words = itertools.repeat(word)
    prons = yield_pron(request.html, _IPA_XPATH_SELECTOR, config)
    yield from zip(words, prons)
예제 #6
0
파일: vie.py 프로젝트: reubenraff/wikipron
def extract_pron(request: requests_html, selector: str,
                 config: "Config") -> "Iterator[Pron]":
    for pron_element in request.html.xpath(selector):
        yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)
예제 #7
0
파일: tha.py 프로젝트: entn-at/wikipron
def extract_word_pron_thai(word: "Word", request: requests.Response,
                           config: "Config") -> "Iterator[WordPronPair]":
    words = itertools.repeat(config.casefold(word))
    prons = yield_pron(request.html, IPA_XPATH_SELECTOR, config)
    yield from zip(words, prons)
예제 #8
0
def yield_cmn_pron(
    request: requests.Response, config: "Config"
) -> "Iterator[Pron]":
    for li_container in request.html.xpath(_PRON_XPATH_TEMPLATE):
        yield from yield_pron(li_container, IPA_XPATH_SELECTOR, config)
예제 #9
0
def _yield_latin_pron(request: requests.Response, config: "Config",
                      tag: str) -> "Iterator[Pron]":
    heading = "h2" if tag == "Latin" else "h3"
    pron_xpath_selector = _PRON_XPATH_TEMPLATE.format(heading=heading, tag=tag)
    for pron_element in request.html.xpath(pron_xpath_selector):
        yield from yield_pron(pron_element, IPA_XPATH_SELECTOR, config)
예제 #10
0
파일: blt.py 프로젝트: lfashby/wikipron
def extract_word_pron_blt(word: "Word", request: requests_html,
                          config: "Config") -> "Iterator[WordPronPair]":
    words = itertools.repeat(word)
    selector = _PRON_XPATH_SELECTOR_TEMPLATE.format(language=config.language)
    prons = yield_pron(request.html, selector, config)
    yield from zip(words, prons)
예제 #11
0
파일: nan.py 프로젝트: lfashby/wikipron
def yield_nan_pron(request: requests_html, selector: str,
                   config: "Config") -> "Iterator[Pron]":
    for li_container in request.html.xpath(selector):
        yield from yield_pron(li_container, IPA_XPATH_SELECTOR, config)