Пример #1
0
def indexes_generator(indexes_lang):
    """
    factory that acts according to glossary language

    :param indexes_lang: str
    """
    indexer = None
    """Callable[[Sequence[str], str], Sequence[str]]"""
    if indexes_lang:
        from . import indexes as idxs
        indexer = idxs.languages.get(indexes_lang, None)
        if not indexer:
            msg = "extended indexes not supported for the specified language: %s.\n"\
                  "following languages avaible: %s." %\
                  (indexes_lang, ', '.join(list(idxs.languages.keys())))
            log.error(msg)
            raise ValueError(msg)

    def generate_indexes(title, alts, content, BeautifulSoup):
        indexes = [title]
        indexes.extend(alts)

        if BeautifulSoup:
            quoted_title = BeautifulSoup.dammit.EntitySubstitution.substitute_xml(
                title, True)
        else:
            quoted_title = '"%s"' % title.replace('>', '>').replace(
                '"', """)

        if indexer:
            indexes = set(indexer(indexes, content))

        normal_indexes = set()
        for idx in indexes:
            normal = _normalize.title(idx, BeautifulSoup)
            normal_indexes.add(_normalize.title_long(normal))
            normal_indexes.add(_normalize.title_short(normal))
        normal_indexes.discard(title)

        normal_indexes = [s for s in normal_indexes if s.strip()]
        # skip empty titles.  everything could happen.

        s = '<d:index d:value=%s d:title=%s/>' % (quoted_title, quoted_title)
        if BeautifulSoup:
            for idx in normal_indexes:
                s += '<d:index d:value=%s d:title=%s/>' % (
                    BeautifulSoup.dammit.EntitySubstitution.substitute_xml(
                        idx, True), quoted_title)
        else:
            for idx in normal_indexes:
                s += '<d:index d:value="%s" d:title=%s/>' % (idx.replace(
                    '>', '&gt;').replace('"', "&quot;"), quoted_title)
        return s

    return generate_indexes
Пример #2
0
def indexes_generator(indexes_lang):
	"""
	factory that acts according to glossary language

	:param indexes_lang: str
	"""
	indexer = None
	"""Callable[[Sequence[str], str], Sequence[str]]"""
	if indexes_lang:
		from . import indexes as idxs
		indexer = idxs.languages.get(indexes_lang, None)
		if not indexer:
			msg = "extended indexes not supported for the specified language: %s.\n"\
				  "following languages avaible: %s." %\
				  (indexes_lang, ", ".join(list(idxs.languages.keys())))
			log.error(msg)
			raise ValueError(msg)

	def generate_indexes(title, alts, content, BeautifulSoup):
		indexes = [title]
		indexes.extend(alts)

		if BeautifulSoup:
			quoted_title = BeautifulSoup.dammit.EntitySubstitution.substitute_xml(title, True)
		else:
			quoted_title = '"%s"' % title.replace(">", "&gt;").replace('"', "&quot;")

		if indexer:
			indexes = set(indexer(indexes, content))

		normal_indexes = set()
		for idx in indexes:
			normal = _normalize.title(idx, BeautifulSoup)
			normal_indexes.add(_normalize.title_long(normal))
			normal_indexes.add(_normalize.title_short(normal))
		normal_indexes.discard(title)

		normal_indexes = [s for s in normal_indexes if s.strip()]
		# skip empty titles.  everything could happen.

		s = "<d:index d:value=%s d:title=%s/>" % (quoted_title, quoted_title)
		if BeautifulSoup:
			for idx in normal_indexes:
				s += "<d:index d:value=%s d:title=%s/>" % (
					BeautifulSoup.dammit.EntitySubstitution.substitute_xml(idx, True),
					quoted_title)
		else:
			for idx in normal_indexes:
				s += '<d:index d:value="%s" d:title=%s/>' % (
					idx.replace(">", "&gt;").replace('"', "&quot;"),
					quoted_title)
		return s
	return generate_indexes
Пример #3
0
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
"""
Russian indexes based on pymorphy.
"""

from . import languages
from pyglossary.plugins.formats_common import log

try:
    import pymorphy2
except ImportError:
    log.error(
        """module pymorphy2 is required to build extended Russian indexes.
You can download it here: http://pymorphy2.readthedocs.org/en/latest/.
Or by running: sudo pip3 install pymorphy2""")
    raise

morphy = pymorphy2.MorphAnalyzer()


def ru(titles, _):
    """
	gives a set of all declines, cases and other froms of word `title`.
	note that it works only if title is one word.

	:type titles: Sequence[str]
	:rtype: Set[str]
	"""
    indexes = set()
Пример #4
0
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
"""
Russian indexes based on pymorphy.
"""

from . import languages
from pyglossary.plugins.formats_common import log

try:
    import pymorphy2
except ImportError:
    log.error(
        """module pymorphy2 is required to build extended russian indexes.  \
you can download it here: http://pymorphy2.readthedocs.org/en/latest/.  \
or run `pip3 install pymorphy2`.
""")
    raise
else:
    morphy = pymorphy2.MorphAnalyzer()


def ru(titles, _):
    """
	gives a set of all declines, cases and other froms of word `title`.
	note that it works only if title is one word.

	:type titles: Sequence[str]
	:rtype: Set[str]
	"""
Пример #5
0
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
"""
Russian indexes based on pymorphy.
"""

from . import languages
from pyglossary.plugins.formats_common import log

try:
	import pymorphy2
except ImportError:
	log.error("""module pymorphy2 is required to build extended russian indexes.  \
you can download it here: http://pymorphy2.readthedocs.org/en/latest/.  \
or run `pip3 install pymorphy2`.
""")
	raise
else:
	morphy = pymorphy2.MorphAnalyzer()


def ru(titles, _):
	"""
	gives a set of all declines, cases and other froms of word `title`.
	note that it works only if title is one word.

	:type titles: Sequence[str]
	:rtype: Set[str]
	"""
	indexes = set()
Пример #6
0
__all__ = ['languages', 'log']

languages = {}
"""
Dict[str, Callable[[Sequence[str], str], Sequence[str]]]

submodules must register languages by adding (language name -> function)
pairs to the mapping.

function must follow signature bellow:
    :param titles: flat iterable of title and altenrative titles
    :param content: cleaned entry content
    :return: iterable of indexes (str).

use
```
    from . import languages
    # or
    from appledict.indexes import languages
```
"""

here = os.path.dirname(os.path.abspath(__file__))

for _, module, _ in pkgutil.iter_modules([here]):
    try:
        __import__('%s.%s' % (__name__, module))
    except ImportError:
        log.error("error while importing indexes plugin %s" % module,
                  exc_info=1)
Пример #7
0
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
"""
Chinese wildcard and pinyin indexes.
"""

from pyglossary.plugins.formats_common import log
import re
import bs4

try:
    import colorize_pinyin as color
except ImportError:
    log.error(
        """module colorize_pinyin is required to build extended Chinese indexes.
You can install it by running: sudo pip3 install colorize-pinyin""")
    raise

from . import languages, log


def zh(titles, content):
    """
	Chinese indexes.

	assuming that content is HTML and pinyin is inside second tag
	(first is <h1>), we can try to parse pinyin and generate indexes
	with pinyin subwords separated by whitespaces
	- pinyin itself
	- pinyin with diacritics replaced by tone numbers
Пример #8
0
__all__ = ['languages', 'log']

languages = {}
"""
Dict[str, Callable[[Sequence[str], str], Sequence[str]]]

submodules must register languages by adding (language name -> function)
pairs to the mapping.

function must follow signature bellow:
    :param titles: flat iterable of title and altenrative titles
    :param content: cleaned entry content
    :return: iterable of indexes (str).

use
```
    from . import languages
    # or
    from appledict.indexes import languages
```
"""

here = os.path.dirname(os.path.abspath(__file__))

for _, module, _ in pkgutil.iter_modules([here]):
    try:
        __import__('%s.%s' % (__name__, module))
    except ImportError:
        log.error("error while importing indexes plugin %s" % module, exc_info=1)