def get_money_annotations(text: str, float_digits=4) \ -> Generator[MoneyAnnotation, None, None]: for match in CURRENCY_PTN_RE.finditer(text): capture = match.capturesdict() if not (capture['prefix'] or capture['postfix']) and not (capture['trigger_word']): continue prefix = capture['prefix'] postfix = capture['postfix'] amount = list( get_amounts(capture['amount'][0], float_digits=float_digits)) if len(amount) != 1: continue if prefix: prefix = prefix[0].lower() currency_type = CURRENCY_SYMBOL_MAP.get(prefix)\ or CURRENCY_PREFIX_MAP.get(prefix)\ or prefix.upper() elif postfix: postfix = postfix[0].lower() currency_type = CURRENCY_TOKEN_MAP.get(postfix) or ( capture['postfix'][0]).upper() else: currency_type = None if not currency_type: currency_type = DEFAULT_CURRENCY text = capture['text'][0].strip( string.punctuation.replace('$', '') + string.whitespace) ant = MoneyAnnotation(coords=match.span(), amount=amount[0], text=text, currency=currency_type) yield ant
def get_money(text, return_sources=False, float_digits=4) -> Generator: for match in CURRENCY_PTN_RE.finditer(text): capture = match.capturesdict() if not (capture['prefix'] or capture['postfix']): continue prefix = capture['prefix'] postfix = capture['postfix'] amount = list( get_amounts(capture['amount'][0], float_digits=float_digits)) if len(amount) != 1: continue if prefix: prefix = prefix[0].lower() currency_type = CURRENCY_SYMBOL_MAP.get(prefix)\ or CURRENCY_PREFIX_MAP.get(prefix)\ or prefix.upper() else: postfix = postfix[0].lower() currency_type = CURRENCY_TOKEN_MAP.get(postfix) or ( capture['postfix'][0]).upper() item = (amount[0], currency_type) if return_sources: item += (capture['text'][0].strip( string.punctuation.replace('$', '') + string.whitespace), ) yield item
('chinese yuan', 'CNY'), ('dollars', 'USD'), ('dollar', 'USD'), ('euros', 'EUR'), ('euro', 'EUR'), ('pounds', 'GBP'), ('pound', 'GBP'), ('renminbi', 'CNY'), ('yens', 'JPY'), ('yen', 'JPY'), ('yuans', 'CNY'), ('yuan', 'CNY') ]) CURRENCY_ABBR_LIST = set( list(CURRENCY_SYMBOL_MAP.values()) + list(CURRENCY_TOKEN_MAP.values()) + list(CURRENCY_PREFIX_MAP.values()) ) CURRENCY_PREFIXES = set( list(CURRENCY_PREFIX_MAP.keys()) + list(CURRENCY_SYMBOL_MAP.values()) ) CURR_NUM_PTN = NUM_PTN.replace('(?<=\\W|^)', '') CURRENCY_PTN = r""" (?P<text> (?P<prefix>{currency_prefixes}|[{currency_symbols}])\s* (?P<amount>{num_ptn_1})
__version__ = "1.4.0" __maintainer__ = "LexPredict, LLC" __email__ = "*****@*****.**" DEFAULT_CURRENCY = 'USD' CURRENCY_TOKEN_MAP = OrderedDict([('chinese yuans', 'CNY'), ('chinese yuan', 'CNY'), ('dollars', 'USD'), ('dollar', 'USD'), ('euros', 'EUR'), ('euro', 'EUR'), ('pounds', 'GBP'), ('pound', 'GBP'), ('renminbi', 'CNY'), ('yens', 'JPY'), ('yen', 'JPY'), ('yuans', 'CNY'), ('yuan', 'CNY')]) CURRENCY_ABBR_LIST = set( list(CURRENCY_SYMBOL_MAP.values()) + list(CURRENCY_TOKEN_MAP.values()) + list(CURRENCY_PREFIX_MAP.values())) CURRENCY_PREFIXES = set( list(CURRENCY_PREFIX_MAP.keys()) + list(CURRENCY_SYMBOL_MAP.values())) CURR_NUM_PTN = NUM_PTN.replace('(?<=\\W|^)', '') TRIGGER_WORDS = ['price', 'cost'] CURRENCY_PTN = r""" (?P<text> (?P<prefix>{currency_prefixes}|[{currency_symbols}])\s* (?P<amount>{num_ptn_1}) | (?P<amount>{num_ptn_2})\s*