Пример #1
0
def _tokenize(text):
    """Split text into a list of tokens."""
    return PATTERN.findall(text)


def tokenize(pipe):
    for text in pipe:
        for position, token in enumerate(_tokenize(text)):
            yield Token(token, position=position)


def _normalize(s):
    return s.update(unidecode(s.lower()))


normalize = yielder(_normalize)

SYNONYMS = {}


@config.on_load
def load_synonyms():
    path = config.SYNONYMS_PATH
    if not path:
        path = config.RESOURCES_ROOT / 'synonyms' / config.SYNONYMS_FILENAME
    with Path(path).open() as f:
        for line in f:
            if line.startswith('#'):
                continue
            synonyms, wanted = line.split('=>')
            wanted = wanted.strip()
Пример #2
0

def _tokenize(text):
    """Split text into a list of tokens."""
    return PATTERN.findall(text)


def tokenize(pipe):
    for text in pipe:
        for position, token in enumerate(_tokenize(text)):
            yield Token(token, position=position)


def _normalize(s):
    return s.update(unidecode(s.lower()))
normalize = yielder(_normalize)


SYNONYMS = {}


def load_synonyms():
    with Path(config.SYNONYMS_PATH).open() as f:
        for line in f:
            if line.startswith('#'):
                continue
            synonyms, wanted = line.split('=>')
            wanted = wanted.strip()
            synonyms = synonyms.split(',')
            for synonym in synonyms:
                synonym = synonym.strip()
Пример #3
0
def _tokenize(text):
    """Split text into a list of tokens."""
    return PATTERN.findall(text)


def tokenize(pipe):
    for text in pipe:
        for position, token in enumerate(_tokenize(text)):
            yield Token(token, position=position)


def _normalize(s):
    return s.update(unidecode(s.lower()))


normalize = yielder(_normalize)


@config.on_load
def load_synonyms():
    config.SYNONYMS = {}
    path = config.SYNONYMS_PATH
    if not path:
        return  # pragma: no cover
    with Path(path).open() as f:
        for line in f:
            if line.startswith('#'):
                continue
            synonyms, wanted = line.split('=>')
            wanted = wanted.strip()
            synonyms = synonyms.split(',')
Пример #4
0
from addok.helpers import yielder

from . import utils
try:
    import pkg_resources
except ImportError:  # pragma: no cover
    pass
else:
    if __package__:
        VERSION = pkg_resources.get_distribution(__package__).version


clean_query = yielder(utils.clean_query)
from addok.helpers import yielder

from . import utils

phonemicize = yielder(utils.phonemicize)
Пример #6
0
from addok.helpers import yielder

from . import utils
try:
    import pkg_resources
except ImportError:  # pragma: no cover
    pass
else:
    if __package__:
        VERSION = pkg_resources.get_distribution(__package__).version

clean_query = yielder(utils.clean_query)
extract_address = yielder(utils.extract_address)
glue_ordinal = utils.glue_ordinal
fold_ordinal = yielder(utils.fold_ordinal)
flag_housenumber = utils.flag_housenumber
make_labels = utils.make_labels
remove_leading_zeros = yielder(utils.remove_leading_zeros)
Пример #7
0
from addok.helpers import yielder

from . import utils

try:
    import pkg_resources
except ImportError:  # pragma: no cover
    pass
else:
    if __package__:
        VERSION = pkg_resources.get_distribution(__package__).version


clean_query = yielder(utils.clean_query)
extract_address = yielder(utils.extract_address)
fold_ordinal = yielder(utils.fold_ordinal)
glue_ordinal = yielder(utils.glue_ordinal)
make_labels = utils.make_labels
remove_leading_zeros = yielder(utils.remove_leading_zeros)