def make_provider_safe(provider): """ Given a potential provider part, transform it so that it is valid as part of an NTIID string. .. caution:: This is not a reversible transformation. """ provider = re.sub(_illegal_chars_pattern, u'_', text_(provider)) provider = escape_provider(provider) return provider
def escape_provider(provider): """ Makes a provider name safe for use in an NTIID by escaping characters not safe for a URL, such as _ and ' '. When comparing provider names with those that come from an NTIID, you should always call this function. :return: The escaped provider. """ return text_(provider).replace(u' ', u'_').replace(u'-', u'_')
def make_specific_safe(specific, strict=True): """ Given a potential specific part, transform it so that it is valid as part of an NTIID string. This includes removing disallowed characters, and limiting the range of characters to printable ASCII compatible characters. .. caution:: This is not a reversible transformation. :keyword bool strict: If true (the default) then a maximally compatible set of characters will be substituted to make the safest part most likely to be parsed even by borderline parsers (such as those that are regex based). If set to false, however, only the minimal set of characters prohibited by the conforming parser implementation of this module will be replaced; typically only do this for backwards compatibility reasons. :raises InvalidNTIIDError: If this cannot be done. In particular, we refuse to create a safe part that consists entirely of the replacement characters; at least one character originally supplied must be valid. We also refuse to create a zero-length safe part. """ # Since we are is ascii-land here, easy way to strip all high-chars is to # encode if not isinstance(specific, bytes): specific = bytes_(specific, 'ascii', 'ignore') # back to unicode specific = text_(specific) table = _sp_strict_transtable if strict else _sp_lax_transtable specific = translate(specific, table) if not specific or set(specific) == set(_sp_repl_byte): raise ImpossibleToMakeSpecificPartSafe(specific) # ensure unicode after translate return text_(specific)
def test_bytes(self): assert_that(bytes_(u'\u2019'), is_(b'\xe2\x80\x99')) assert_that(text_(b'\xe2\x80\x99'), is_(u'\u2019'))
def make_ntiid(date=DATE, provider=None, nttype=None, specific=None, base=None): """ Create a new NTIID. :param number date: A value from :meth:`time.time`. If missing (0 or `None`), today will be used. If a string, then that string should be a portion of an ISO format date, e.g., 2011-10. :param str provider: Optional provider name. We will sanitize it for our format. :param str nttype: Required NTIID type (if no base is given) :param str specific: Optional type-specific part. :param str base: If given, an NTIID string from which provider, nttype, specific, and date will be taken if they are not directly specified. If not a valid NTIID, will be ignored. :return: A new NTIID string formatted as of the given date. """ # TODO: Simplify # pylint:disable=too-many-branches base = base if is_valid_ntiid_string(base) else None if not nttype and not base: raise ValueError('Must supply type') date_string = None if date is DATE and base is not None: date_string = get_parts(base).date elif isinstance(date, string_types): date_string = date else: # Account for 0/None if isinstance(date, numbers.Real) and date > 0: date_seconds = date else: date_seconds = time.time() # Always get the date in UTC/GMT by converting the epoch into a GMT tuple. # Then turn into a date object since that's the easiest way to get ISO # format. date = datetime.date(*time.gmtime(date_seconds)[0:3]) date_string = date.isoformat() if not date_string: __traceback_info__ = date, base raise ValueError("Unable to derive date string") base_parts = get_parts(base) # This is not a reversible transformation. Who should do this? if provider: if not isinstance(provider, _strings_or_bytes): provider = text_type(provider) if isinstance(provider, bytes): # May have high-bytes; strip them. provider = provider.decode('ascii', 'ignore') elif isinstance(provider, text_type): # Strip high bytes provider = provider.encode('ascii', 'ignore').decode('ascii') provider = escape_provider(provider) + u'-' else: provider = (base_parts.provider + u'-' if base_parts.provider else u'') if specific: specific = u'-' + text_(specific) else: specific = (u'-' + base_parts.specific if base_parts.specific else u'') nttype = nttype or base_parts.nttype __traceback_info__ = (date_string, provider, nttype, specific) result = u'tag:nextthought.com,%s:%s%s%s' % __traceback_info__ return validate_ntiid_string(result)