示例#1
0
def get_money_annotations(text: str, float_digits=4) \
        -> Generator[MoneyAnnotation, None, None]:
    for match in CURRENCY_PTN_RE.finditer(text):
        capture = match.capturesdict()
        if not (capture['prefix']
                or capture['postfix']) and not (capture['trigger_word']):
            continue
        prefix = capture['prefix']
        postfix = capture['postfix']
        amount = list(
            get_amounts(capture['amount'][0], float_digits=float_digits))
        if len(amount) != 1:
            continue
        if prefix:
            prefix = prefix[0].lower()
            currency_type = CURRENCY_SYMBOL_MAP.get(prefix)\
                            or CURRENCY_PREFIX_MAP.get(prefix)\
                            or prefix.upper()
        elif postfix:
            postfix = postfix[0].lower()
            currency_type = CURRENCY_TOKEN_MAP.get(postfix) or (
                capture['postfix'][0]).upper()
        else:
            currency_type = None
        if not currency_type:
            currency_type = DEFAULT_CURRENCY
        text = capture['text'][0].strip(
            string.punctuation.replace('$', '') + string.whitespace)
        ant = MoneyAnnotation(coords=match.span(),
                              amount=amount[0],
                              text=text,
                              currency=currency_type)
        yield ant
def get_money(text, return_sources=False, float_digits=4) -> Generator:
    for match in CURRENCY_PTN_RE.finditer(text):
        capture = match.capturesdict()
        if not (capture['prefix'] or capture['postfix']):
            continue
        prefix = capture['prefix']
        postfix = capture['postfix']
        amount = list(
            get_amounts(capture['amount'][0], float_digits=float_digits))
        if len(amount) != 1:
            continue
        if prefix:
            prefix = prefix[0].lower()
            currency_type = CURRENCY_SYMBOL_MAP.get(prefix)\
                            or CURRENCY_PREFIX_MAP.get(prefix)\
                            or prefix.upper()
        else:
            postfix = postfix[0].lower()
            currency_type = CURRENCY_TOKEN_MAP.get(postfix) or (
                capture['postfix'][0]).upper()
        item = (amount[0], currency_type)
        if return_sources:
            item += (capture['text'][0].strip(
                string.punctuation.replace('$', '') + string.whitespace), )
        yield item
示例#3
0
    ('chinese yuan', 'CNY'),
    ('dollars', 'USD'),
    ('dollar', 'USD'),
    ('euros', 'EUR'),
    ('euro', 'EUR'),
    ('pounds', 'GBP'),
    ('pound', 'GBP'),
    ('renminbi', 'CNY'),
    ('yens', 'JPY'),
    ('yen', 'JPY'),
    ('yuans', 'CNY'),
    ('yuan', 'CNY')
])

CURRENCY_ABBR_LIST = set(
    list(CURRENCY_SYMBOL_MAP.values()) +
    list(CURRENCY_TOKEN_MAP.values()) +
    list(CURRENCY_PREFIX_MAP.values())
)

CURRENCY_PREFIXES = set(
    list(CURRENCY_PREFIX_MAP.keys()) +
    list(CURRENCY_SYMBOL_MAP.values())
)

CURR_NUM_PTN = NUM_PTN.replace('(?<=\\W|^)', '')

CURRENCY_PTN = r"""
(?P<text>
(?P<prefix>{currency_prefixes}|[{currency_symbols}])\s*
(?P<amount>{num_ptn_1})
示例#4
0
__version__ = "1.4.0"
__maintainer__ = "LexPredict, LLC"
__email__ = "*****@*****.**"

DEFAULT_CURRENCY = 'USD'

CURRENCY_TOKEN_MAP = OrderedDict([('chinese yuans', 'CNY'),
                                  ('chinese yuan', 'CNY'), ('dollars', 'USD'),
                                  ('dollar', 'USD'), ('euros', 'EUR'),
                                  ('euro', 'EUR'), ('pounds', 'GBP'),
                                  ('pound', 'GBP'), ('renminbi', 'CNY'),
                                  ('yens', 'JPY'), ('yen', 'JPY'),
                                  ('yuans', 'CNY'), ('yuan', 'CNY')])

CURRENCY_ABBR_LIST = set(
    list(CURRENCY_SYMBOL_MAP.values()) + list(CURRENCY_TOKEN_MAP.values()) +
    list(CURRENCY_PREFIX_MAP.values()))

CURRENCY_PREFIXES = set(
    list(CURRENCY_PREFIX_MAP.keys()) + list(CURRENCY_SYMBOL_MAP.values()))

CURR_NUM_PTN = NUM_PTN.replace('(?<=\\W|^)', '')

TRIGGER_WORDS = ['price', 'cost']

CURRENCY_PTN = r"""
(?P<text>
    (?P<prefix>{currency_prefixes}|[{currency_symbols}])\s*
    (?P<amount>{num_ptn_1})
    |
    (?P<amount>{num_ptn_2})\s*