def join(cls, phrases, language, country=None): if not hasattr(phrases, '__iter__'): raise ValueError('Param phrases must be iterable') values, probs = address_config.alternative_probabilities( cls.key, language, country=country) phrase, props = weighted_choice(values, probs) whitespace = props.get('whitespace', True) whitespace_phrase = six.u(' ') if whitespace else six.u('') phrases = [safe_decode(p) for p in phrases] max_phrase_join = props.get('max_phrase_join', 2) if len(phrases) > max_phrase_join: default_join = safe_decode( props.get( 'default_join', cls.DEFAULT_WHITESPACE_JOIN if whitespace else cls.DEFAULT_NON_WHITESPACE_JOIN)) prefix = default_join.join(phrases[:-max_phrase_join] + [six.u('')]) else: prefix = six.u('') if whitespace: phrase = six.u('{}{}{}').format(whitespace_phrase, phrase, whitespace_phrase) joined_phrase = phrase.join(phrases[-max_phrase_join:]) return six.u('').join([prefix, joined_phrase])
def join(cls, phrases, language, country=None): if not hasattr(phrases, '__iter__'): raise ValueError('Param phrases must be iterable') values, probs = address_config.alternative_probabilities(cls.key, language, country=country) phrase, props = weighted_choice(values, probs) whitespace = props.get('whitespace', True) whitespace_phrase = six.u(' ') if whitespace else six.u('') phrases = [safe_decode(p) for p in phrases] max_phrase_join = props.get('max_phrase_join', 2) if len(phrases) > max_phrase_join: default_join = safe_decode(props.get('default_join', cls.DEFAULT_WHITESPACE_JOIN if whitespace else cls.DEFAULT_NON_WHITESPACE_JOIN)) prefix = default_join.join(phrases[:-max_phrase_join] + [six.u('')]) else: prefix = six.u('') if whitespace: phrase = six.u('{}{}{}').format(whitespace_phrase, phrase, whitespace_phrase) joined_phrase = phrase.join(phrases[-max_phrase_join:]) return six.u('').join([prefix, joined_phrase])
def phrase(cls, language, country=None): values, probs = address_config.alternative_probabilities( 'cross_streets.intersection', language, country=country) if not values: return None phrase, props = weighted_choice(values, probs) return phrase
def phrase(cls, unit, language, country=None, zone=None): if unit is not None: key = 'units.alphanumeric' if zone is None else 'units.zones.{}'.format(zone) if not address_config.get_property(key, language, country=country): return None is_alpha = safe_decode(unit).isalpha() direction_unit = None add_direction = address_config.get_property('{}.add_direction'.format(key), language, country=country) if add_direction: direction_unit = cls.add_direction(key, unit, language, country=country) if direction_unit and direction_unit != unit: unit = direction_unit is_alpha = False else: add_quadrant = address_config.get_property('{}.add_quadrant'.format(key), language, country=country) if add_quadrant: unit = cls.add_quadrant(key, unit, language, country=country) is_alpha = False return cls.numeric_phrase(key, safe_decode(unit), language, dictionaries=['unit_types_numbered'], country=country, is_alpha=is_alpha) else: key = 'units.standalone' values, probs = address_config.alternative_probabilities(key, language, dictionaries=['unit_types_standalone'], country=country) if values is None: return None phrase, phrase_props = weighted_choice(values, probs) return phrase.title()
def pick_phrase_and_type(cls, number, language, country=None): values, probs = address_config.alternative_probabilities( cls.key, language, dictionaries=cls.dictionaries, country=country) if not values: return None, safe_decode( number) if number is not None else None, None phrase, phrase_props = weighted_choice(values, probs) values = [] probs = [] for num_type in (cls.NUMERIC, cls.NUMERIC_AFFIX): key = '{}_probability'.format(num_type) prob = phrase_props.get(key, None) if prob is not None: values.append(num_type) probs.append(prob) if not probs: num_type = cls.NUMERIC else: probs = cdf(probs) num_type = weighted_choice(values, probs) return num_type, phrase, phrase_props[num_type]
def phrase(cls, unit, language, country=None, zone=None): if unit is not None: key = 'units.alphanumeric' if zone is None else 'units.zones.{}'.format( zone) if not address_config.get_property(key, language, country=country): return None is_alpha = safe_decode(unit).isalpha() direction_unit = None add_direction = address_config.get_property( '{}.add_direction'.format(key), language, country=country) if add_direction: direction_unit = cls.add_direction(key, unit, language, country=country) if direction_unit and direction_unit != unit: unit = direction_unit is_alpha = False else: add_quadrant = address_config.get_property( '{}.add_quadrant'.format(key), language, country=country) if add_quadrant: unit = cls.add_quadrant(key, unit, language, country=country) is_alpha = False return cls.numeric_phrase(key, safe_decode(unit), language, dictionaries=['unit_types_numbered'], country=country, is_alpha=is_alpha) else: key = 'units.standalone' values, probs = address_config.alternative_probabilities( key, language, dictionaries=['unit_types_standalone'], country=country) if values is None: return None phrase, phrase_props = weighted_choice(values, probs) return phrase.title()
def phrase(cls, language, key, value, is_plural=False, country=None): category_phrase = category_config.get_phrase(language, key, value, is_plural=is_plural) if not category_phrase: return NULL_CATEGORY_QUERY category_phrase = safe_decode(category_phrase) prep_phrase_type = CategoryPreposition.random(language, country=country) if prep_phrase_type in (None, CategoryPreposition.NULL): return CategoryQuery(category_phrase, prep=None, add_place_name=True, add_address=True) values, probs = address_config.alternative_probabilities( 'categories.{}'.format(prep_phrase_type), language, country=country) if not values: return CategoryQuery(category_phrase, prep=None, add_place_name=True, add_address=True) prep_phrase, prep_phrase_props = weighted_choice(values, probs) prep_phrase = safe_decode(prep_phrase) add_address = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME, CategoryPreposition.IN) add_place_name = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME) return CategoryQuery(category_phrase, prep=prep_phrase, add_place_name=add_place_name, add_address=add_address)
def phrase(cls, chain, language, country=None): if not chain: return NULL_CHAIN_QUERY chain_phrase = safe_decode(chain) prep_phrase_type = CategoryPreposition.random(language, country=country) if prep_phrase_type in (None, CategoryPreposition.NULL): return ChainQuery(chain_phrase, prep=None, add_place_name=True, add_address=True) values, probs = address_config.alternative_probabilities('categories.{}'.format(prep_phrase_type), language, country=country) if not values: return ChainQuery(chain_phrase, prep=None, add_place_name=True, add_address=True) prep_phrase, prep_phrase_props = weighted_choice(values, probs) prep_phrase = safe_decode(prep_phrase) add_address = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME, CategoryPreposition.IN) add_place_name = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME) return ChainQuery(chain_phrase, prep=prep_phrase, add_place_name=add_place_name, add_address=add_address)
def phrase(cls, chain, language, country=None): if not chain: return NULL_CHAIN_QUERY chain_phrase = safe_decode(chain) prep_phrase_type = CategoryPreposition.random(language, country=country) if prep_phrase_type in (None, CategoryPreposition.NULL): return ChainQuery(chain_phrase, prep=None, add_place_name=True, add_address=True) values, probs = address_config.alternative_probabilities( 'categories.{}'.format(prep_phrase_type), language, country=country) if not values: return ChainQuery(chain_phrase, prep=None, add_place_name=True, add_address=True) prep_phrase, prep_phrase_props = weighted_choice(values, probs) prep_phrase = safe_decode(prep_phrase) add_address = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME, CategoryPreposition.IN) add_place_name = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME) return ChainQuery(chain_phrase, prep=prep_phrase, add_place_name=add_place_name, add_address=add_address)
def phrase(cls, language, key, value, is_plural=False, country=None): category_phrase = category_config.get_phrase(language, key, value, is_plural=is_plural) if not category_phrase: return NULL_CATEGORY_QUERY category_phrase = safe_decode(category_phrase) prep_phrase_type = CategoryPreposition.random(language, country=country) if prep_phrase_type in (None, CategoryPreposition.NULL): return CategoryQuery(category_phrase, prep=None, add_place_name=True, add_address=True) values, probs = address_config.alternative_probabilities('categories.{}'.format(prep_phrase_type), language, country=country) if not values: return CategoryQuery(category_phrase, prep=None, add_place_name=True, add_address=True) prep_phrase, prep_phrase_props = weighted_choice(values, probs) prep_phrase = safe_decode(prep_phrase) add_address = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME, CategoryPreposition.IN) add_place_name = prep_phrase_type not in (CategoryPreposition.NEARBY, CategoryPreposition.NEAR_ME) return CategoryQuery(category_phrase, prep=prep_phrase, add_place_name=add_place_name, add_address=add_address)
def pick_phrase_and_type(cls, number, language, country=None): values, probs = address_config.alternative_probabilities(cls.key, language, dictionaries=cls.dictionaries, country=country) if not values: return None, safe_decode(number) if number is not None else None, None phrase, phrase_props = weighted_choice(values, probs) values = [] probs = [] for num_type in (cls.NUMERIC, cls.NUMERIC_AFFIX): key = '{}_probability'.format(num_type) prob = phrase_props.get(key, None) if prob is not None: values.append(num_type) probs.append(prob) if not probs: num_type = cls.NUMERIC else: probs = cdf(probs) num_type = weighted_choice(values, probs) return num_type, phrase, phrase_props[num_type]
def numeric_phrase(cls, key, num, language, country=None, dictionaries=(), strict_numeric=False, is_alpha=False): has_alpha = False has_numeric = True is_integer = False is_none = False if num is not None: try: num_int = int(num) is_integer = True except ValueError: try: num_float = float(num) except ValueError: tokens = tokenize(safe_decode(num)) has_numeric = False for t, c in tokens: if c == token_types.NUMERIC: has_numeric = True if any((ch.isalpha() for ch in t)): has_alpha = True if strict_numeric and has_alpha: return safe_decode(num) else: is_none = True values, probs = None, None if is_alpha: values, probs = address_config.alternative_probabilities('{}.alpha'.format(key), language, dictionaries=dictionaries, country=country) # Pick a phrase given the probability distribution from the config if values is None: values, probs = address_config.alternative_probabilities(key, language, dictionaries=dictionaries, country=country) if not values: return safe_decode(num) if not is_none else None phrase, phrase_props = weighted_choice(values, probs) values = [] probs = [] # Dictionaries are lowercased, so title case here if phrase_props.get('title_case', True): phrase = phrase.title() ''' There are a few ways we can express the number itself 1. Alias it as some standalone word like basement (for floor "-1") 2. Use the number itself, so "Floor 2" 3. Append/prepend an affix e.g. 2/F for second floor 4. As an ordinal expression e.g. "2nd Floor" ''' have_standalone = False have_null = False for num_type in ('standalone', 'null', 'numeric', 'numeric_affix', 'ordinal'): key = '{}_probability'.format(num_type) prob = phrase_props.get(key) if prob is not None: if num_type == 'standalone': have_standalone = True elif num_type == 'null': have_null = True values.append(num_type) probs.append(prob) elif num_type in phrase_props: values.append(num_type) probs.append(1.0) break if not probs or is_none: return phrase # If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items if has_alpha: values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'null', 'standalone')]) total = float(sum(probs)) if isclose(total, 0.0): return None probs = [p / total for p in probs] probs = cdf(probs) if len(values) < 2: if have_standalone: num_type = 'standalone' elif have_null: num_type = 'null' else: num_type = 'numeric' else: num_type = weighted_choice(values, probs) if num_type == 'standalone': return phrase elif num_type == 'null': return safe_decode(num) props = phrase_props[num_type] if is_integer: num_int = int(num) if phrase_props.get('number_abs_value', False): num_int = abs(num_int) num = num_int if 'number_min_abs_value' in phrase_props and num_int < phrase_props['number_min_abs_value']: return None if 'number_max_abs_value' in phrase_props and num_int > phrase_props['number_max_abs_value']: return None if phrase_props.get('number_subtract_abs_value'): num_int -= phrase_props['number_subtract_abs_value'] num = num_int num = safe_decode(num) digits_props = props.get('digits') if digits_props: # Inherit the gender and category e.g. for ordinals for k in ('gender', 'category'): if k in props: digits_props[k] = props[k] num = Digits.rewrite(num, language, digits_props, num_type=Digits.CARDINAL if num_type != 'ordinal' else Digits.ORDINAL) # Do we add the numeric phrase e.g. Floor No 1 add_number_phrase = props.get('add_number_phrase', False) if add_number_phrase and random.random() < props['add_number_phrase_probability']: num = Number.phrase(num, language, country=country) whitespace_default = True if num_type == 'numeric_affix': phrase = props['affix'] if props.get('upper_case', True): phrase = phrase.upper() if 'zero_pad' in props and num.isdigit(): num = num.rjust(props['zero_pad'], props.get('zero_char', '0')) whitespace_default = False elif num_type == 'ordinal' and safe_decode(num).isdigit(): ordinal_expression = ordinal_expressions.suffixed_number(num, language, gender=props.get('gender', None)) if ordinal_expression is not None: num = ordinal_expression if 'null_phrase_probability' in props and (num_type == 'ordinal' or (has_alpha and (has_numeric or 'null_phrase_alpha_only' in props))): if random.random() < props['null_phrase_probability']: return num direction = props['direction'] whitespace = props.get('whitespace', whitespace_default) whitespace_probability = props.get('whitespace_probability') if whitespace_probability is not None: whitespace = random.random() < whitespace_probability # Occasionally switch up if direction_probability is specified if random.random() > props.get('direction_probability', 1.0): if direction == 'left': direction = 'right' elif direction == 'right': direction = 'left' whitespace_phrase = six.u(' ') if whitespace else six.u('') # Phrase goes to the left of hte number if direction == 'left': return six.u('{}{}{}').format(phrase, whitespace_phrase, num) # Phrase goes to the right of the number elif direction == 'right': return six.u('{}{}{}').format(num, whitespace_phrase, phrase) # Need to specify a direction, otherwise return naked number else: return safe_decode(num)
def numeric_phrase(cls, key, num, language, country=None, dictionaries=(), strict_numeric=False, is_alpha=False): has_alpha = False has_numeric = True is_integer = False is_none = False if num is not None: try: num_int = int(num) is_integer = True except ValueError: try: num_float = float(num) except ValueError: tokens = tokenize(safe_decode(num)) has_numeric = False for t, c in tokens: if c == token_types.NUMERIC: has_numeric = True if any((ch.isalpha() for ch in t)): has_alpha = True if strict_numeric and has_alpha: return safe_decode(num) else: is_none = True values, probs = None, None if is_alpha: values, probs = address_config.alternative_probabilities( '{}.alpha'.format(key), language, dictionaries=dictionaries, country=country) # Pick a phrase given the probability distribution from the config if values is None: values, probs = address_config.alternative_probabilities( key, language, dictionaries=dictionaries, country=country) if not values: return safe_decode(num) if not is_none else None phrase, phrase_props = weighted_choice(values, probs) values = [] probs = [] # Dictionaries are lowercased, so title case here if phrase_props.get('title_case', True): phrase = phrase.title() ''' There are a few ways we can express the number itself 1. Alias it as some standalone word like basement (for floor "-1") 2. Use the number itself, so "Floor 2" 3. Append/prepend an affix e.g. 2/F for second floor 4. As an ordinal expression e.g. "2nd Floor" ''' have_standalone = False have_null = False for num_type in ('standalone', 'null', 'numeric', 'numeric_affix', 'ordinal'): key = '{}_probability'.format(num_type) prob = phrase_props.get(key) if prob is not None: if num_type == 'standalone': have_standalone = True elif num_type == 'null': have_null = True values.append(num_type) probs.append(prob) elif num_type in phrase_props: values.append(num_type) probs.append(1.0) break if not probs or is_none: return phrase # If we're using something like "Floor A" or "Unit 2L", remove ordinal/affix items if has_alpha: values, probs = zip(*[(v, p) for v, p in zip(values, probs) if v in ('numeric', 'null', 'standalone')]) total = float(sum(probs)) if isclose(total, 0.0): return None probs = [p / total for p in probs] probs = cdf(probs) if len(values) < 2: if have_standalone: num_type = 'standalone' elif have_null: num_type = 'null' else: num_type = 'numeric' else: num_type = weighted_choice(values, probs) if num_type == 'standalone': return phrase elif num_type == 'null': return safe_decode(num) props = phrase_props[num_type] if is_integer: num_int = int(num) if phrase_props.get('number_abs_value', False): num_int = abs(num_int) num = num_int if 'number_min_abs_value' in phrase_props and num_int < phrase_props[ 'number_min_abs_value']: return None if 'number_max_abs_value' in phrase_props and num_int > phrase_props[ 'number_max_abs_value']: return None if phrase_props.get('number_subtract_abs_value'): num_int -= phrase_props['number_subtract_abs_value'] num = num_int num = safe_decode(num) digits_props = props.get('digits') if digits_props: # Inherit the gender and category e.g. for ordinals for k in ('gender', 'category'): if k in props: digits_props[k] = props[k] num = Digits.rewrite(num, language, digits_props, num_type=Digits.CARDINAL if num_type != 'ordinal' else Digits.ORDINAL) # Do we add the numeric phrase e.g. Floor No 1 add_number_phrase = props.get('add_number_phrase', False) if add_number_phrase and random.random( ) < props['add_number_phrase_probability']: num = Number.phrase(num, language, country=country) whitespace_default = True if num_type == 'numeric_affix': phrase = props['affix'] if props.get('upper_case', True): phrase = phrase.upper() if 'zero_pad' in props and num.isdigit(): num = num.rjust(props['zero_pad'], props.get('zero_char', '0')) whitespace_default = False elif num_type == 'ordinal' and safe_decode(num).isdigit(): ordinal_expression = ordinal_expressions.suffixed_number( num, language, gender=props.get('gender', None)) if ordinal_expression is not None: num = ordinal_expression if 'null_phrase_probability' in props and ( num_type == 'ordinal' or (has_alpha and (has_numeric or 'null_phrase_alpha_only' in props))): if random.random() < props['null_phrase_probability']: return num direction = props['direction'] whitespace = props.get('whitespace', whitespace_default) whitespace_probability = props.get('whitespace_probability') if whitespace_probability is not None: whitespace = random.random() < whitespace_probability # Occasionally switch up if direction_probability is specified if random.random() > props.get('direction_probability', 1.0): if direction == 'left': direction = 'right' elif direction == 'right': direction = 'left' whitespace_phrase = six.u(' ') if whitespace else six.u('') # Phrase goes to the left of hte number if direction == 'left': return six.u('{}{}{}').format(phrase, whitespace_phrase, num) # Phrase goes to the right of the number elif direction == 'right': return six.u('{}{}{}').format(num, whitespace_phrase, phrase) # Need to specify a direction, otherwise return naked number else: return safe_decode(num)
def phrase(cls, language, country=None): values, probs = address_config.alternative_probabilities('cross_streets.intersection', language, country=country) if not values: return None phrase, props = weighted_choice(values, probs) return phrase