Пример #1
0
def transliterate(data, _from=None, _to=None, scheme_map=None, **kw):
    """Transliterate `data` with the given parameters::

      output = transliterate('idam adbhutam', HK, DEVANAGARI)

  Each time the function is called, a new :class:`SchemeMap` is created
  to map the input scheme to the output scheme. This operation is fast
  enough for most use cases. But for higher performance, you can pass a
  pre-computed :class:`SchemeMap` instead::

      scheme_map = SchemeMap(SCHEMES[HK], SCHEMES[DEVANAGARI])
      output = transliterate('idam adbhutam', scheme_map=scheme_map)

  :param data: the data to transliterate
  :param _from: the name of a source scheme
  :param _to: the name of a destination scheme
  :param scheme_map: the :class:`SchemeMap` to use. If specified, ignore
                     `_from` and `_to`. If unspecified, create a
                     :class:`SchemeMap` from `_from` to `_to`.
  """
    if scheme_map is None:
        from_scheme = SCHEMES[_from]
        to_scheme = SCHEMES[_to]
        scheme_map = sanscript.SchemeMap(from_scheme, to_scheme)
    return sanscript.transliterate(data=data, scheme_map=scheme_map)
Пример #2
0
def s_to_d(input_string):
    scheme = detect.detect(input_string)
    print("input: " + input_string + " ---   encoding = " + str(scheme))
    v_scheme_map = sanscript.SchemeMap(sanscript.SCHEMES[sanscript.SLP1],
                                       sanscript.SCHEMES[sanscript.DEVANAGARI])
    output_string = sanscript.transliterate(input_string,
                                            scheme_map=v_scheme_map)
    scheme = detect.detect(output_string)
    print("output: " + output_string + " ---   encoding = " + str(scheme))
    return output_string
Пример #3
0
def v_to_d(input_string):
    scheme = detect.detect(input_string)
    if (str(scheme) == "Velthuis"):
        v_scheme_map = sanscript.SchemeMap(
            sanscript.SCHEMES[sanscript.VELTHUIS],
            sanscript.SCHEMES[sanscript.DEVANAGARI])
        output_string = sanscript.transliterate(input_string,
                                                scheme_map=v_scheme_map)
        scheme = detect.detect(output_string)
    else:
        output_string = input_string
    return output_string
Пример #4
0
def i_to_d(input_string):
    scheme = detect.detect(input_string)
    if (str(scheme) == "ITRANS"):
        inputSchemeIndex = sanscript.ITRANS
    elif (str(scheme) == "HK"):
        inputSchemeIndex = sanscript.HK
    if ((str(scheme) == "ITRANS") or (str(scheme) == "HK")):
        v_scheme_map = sanscript.SchemeMap(
            sanscript.SCHEMES[inputSchemeIndex],
            sanscript.SCHEMES[sanscript.DEVANAGARI])
        output_string = sanscript.transliterate(input_string,
                                                scheme_map=v_scheme_map)
    else:
        output_string = input_string
    return output_string
Пример #5
0
def d_to_i(input_string):
    scheme = detect.detect(input_string)
    #print("input: " + input_string + " ---   encoding = " + str(scheme))
    if (str(scheme) == "Devanagari"):
        v_scheme_map = sanscript.SchemeMap(
            sanscript.SCHEMES[sanscript.DEVANAGARI],
            sanscript.SCHEMES[sanscript.ITRANS])
        output_string = sanscript.transliterate(input_string,
                                                scheme_map=v_scheme_map)
        #print("Ascii translation for tokenization:")
        scheme = detect.detect(output_string)
        #print("output: " + output_string + " ---   encoding = " + str(scheme))
    else:
        output_string = input_string
        #print("please enter the input in devanagari")
    return output_string
Пример #6
0
def main():
    is_pragrahya = False
    print("input:" + sys.argv[1])
    scheme = detect.detect(sys.argv[1])
    input_string = ""
    print("input: " + sys.argv[1] + " ---   encoding = " + str(scheme))
    if (str(scheme) == "Devanagari"):
        v_scheme_map = sanscript.SchemeMap(
            sanscript.SCHEMES[sanscript.DEVANAGARI],
            sanscript.SCHEMES[sanscript.VELTHUIS])
        input_string = sanscript.transliterate(sys.argv[1],
                                               scheme_map=v_scheme_map)
    else:
        print("please enter the input in devanagari")

    is_pragrahya = pragrahya_check(input_string)
    print("**Pragrahya " + str(is_pragrahya))
Пример #7
0
import time
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor
from functools import reduce

import requests
from lxml import etree
from indic_transliteration import sanscript
from requests import Session

LISTINGS_BASE_URI = 'http://eoh.rkmathbangalore.org/listing/alphabet'
WORD_PAGE_BASE_URI = 'http://eoh.rkmathbangalore.org/describe/word'
concurrency = 10

HWS_XLITERATE_SCHEME_MAPS = [
    sanscript.SchemeMap(sanscript.SCHEMES[sanscript.IAST],
                        sanscript.SCHEMES[script]) for script in [
                            sanscript.DEVANAGARI, sanscript.ITRANS,
                            sanscript.TAMIL, sanscript.TELUGU
                        ]
]


def get_listing_pages():
    # we are getting all once for proper hyperlinking
    for alphabet in string.ascii_uppercase:
        yield requests.get('{}/{}'.format(LISTINGS_BASE_URI, alphabet)).text


def _get_class_matching_xpath(tag: str, clsname: str):
    return "//{tag}[contains(concat(' ', normalize-space(@class), ' '), ' {cls} ')]".format(
        tag=tag, cls=clsname)