def DISABLED_test_non_bmp2(): '''non-BMP characters: "\ud800\udc00\udbff\udffd"''' unencoded_in = '\ud800\udc00\udbff\udffd' encoded = '%F0%90%80%80%F4%8F%BF%BD' unencoded_out = '\U00010000\U0010FFFD' assert encoded == iri.percent_encode(unencoded_in), unencoded_in assert unencoded_out == iri.percent_decode(encoded), unencoded_in
def DISABLED_test_non_bmp3(): '''non-BMP characters 3''' unencoded = ''.join(map(chr, range(256))) encoded = '%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F' \ '%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F' \ '%20%21%22%23%24%25%26%27%28%29%2A%2B%2C-.%2F' \ '0123456789%3A%3B%3C%3D%3E%3F%40' \ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60' \ 'abcdefghijklmnopqrstuvwxyz%7B%7C%7D~' \ '%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F' \ '%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F' \ '%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF' \ '%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF' \ '%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF' \ '%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF' \ '%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF' \ '%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF' for part in (1,2,3,15): enc_name = 'iso-8859-%d' % part try: codecs.lookup(enc_name) except LookupError: warnings.warn('Not supported on this platform') continue assert encoded == iri.percent_encode(unencoded, encoding=enc_name), enc_name assert unencoded == iri.percent_decode(encoded, encoding=enc_name), enc_name
def DISABLED_test_percent_encode_template(unencoded, encoded): if len(unencoded) > 10: test_title = unencoded[:11] + '...' else: test_title = unencoded assert encoded == iri.percent_encode(unencoded) assert unencoded == iri.percent_decode(encoded)
def DISABLED_test_non_bmp3(): '''non-BMP characters 3''' unencoded = ''.join(map(chr, range(256))) encoded = '%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F' \ '%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F' \ '%20%21%22%23%24%25%26%27%28%29%2A%2B%2C-.%2F' \ '0123456789%3A%3B%3C%3D%3E%3F%40' \ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60' \ 'abcdefghijklmnopqrstuvwxyz%7B%7C%7D~' \ '%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F' \ '%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F' \ '%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF' \ '%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF' \ '%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF' \ '%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF' \ '%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF' \ '%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF' for part in (1, 2, 3, 15): enc_name = 'iso-8859-%d' % part try: codecs.lookup(enc_name) except LookupError: warnings.warn('Not supported on this platform') continue assert encoded == iri.percent_encode(unencoded, encoding=enc_name), enc_name assert unencoded == iri.percent_decode(encoded, encoding=enc_name), enc_name
def _toiri(ctx): _arg = arg(ctx) if is_pipeline_action(arg) else arg _arg = [_arg] if not isinstance(_arg, list) else _arg ret = [] for u in _arg: iu = u if not (ignore_refs and not iri.is_absolute(iu)): # coerce into an IRIref, but fallout as untyped text otherwise try: iu = I(iu) except ValueError as e: # attempt to recover by percent encoding try: iu = I(iri.percent_encode(iu)) except ValueError as e: ctx.extras['logger'].warn( 'Unable to convert "{}" to IRI reference:\n{}'. format(iu, e)) if base is not None and isinstance(iu, I): iu = I(iri.absolutize(iu, base)) ret.append(iu) return ret
def _relator_property(ctx): ''' Versa action function Utility to specify a list of relationships :param ctx: Versa context used in processing (e.g. includes the prototype link) :return: List of relationships computed from the source text ''' _text_in = text_in(ctx) if callable(text_in) else text_in if not isinstance(_text_in, list): _text_in = [_text_in] #Take into account RDA-isms such as $iContainer of (expression) by stripping the parens https://foundry.zepheira.com/topics/380 return [((prefix or '') + iri.percent_encode(slugify(RDA_PARENS_PAT.sub('', ti), False))) if ti else '' for ti in _text_in]
def _relator_property(ctx): ''' Versa action function Utility to specify a list of relationships :param ctx: Versa context used in processing (e.g. includes the prototype link) :return: List of relationships computed from the source text ''' _text_in = text_in(ctx) if callable(text_in) else text_in _prefix = prefix or '' if not isinstance(_text_in, list): _text_in = [_text_in] #Take into account RDA-isms such as $iContainer of (expression) by stripping the parens https://foundry.zepheira.com/topics/380 properties = [ (_prefix + iri.percent_encode(slugify(RDA_PARENS_PAT.sub('', ti), False))) if ti else '' for ti in _text_in ] properties = [ prop if (allowed is None or prop in allowed) else default for prop in properties ] return properties
def _res(ctx): _arg = arg(ctx) if callable(arg) else arg _arg = [_arg] if not isinstance(_arg, list) else _arg ret = [] for u in _arg: iu = None try: iu = I(u) except ValueError: # attempt to recover by percent encoding try: iu = I(iri.percent_encode(u)) except ValueError as e: ctx.logger('Unable to convert "{}" to IRI reference:\n{}'.format(u, e)) continue if iu and not iri.is_absolute(iu) and base is not None: iu = I(iri.absolutize(iu, base)) ret.append(iu) return ret
def _res(ctx): _arg = arg(ctx) if callable(arg) else arg _arg = [_arg] if not isinstance(_arg, list) else _arg ret = [] for u in _arg: iu = u if not (ignore_refs and not iri.is_absolute(iu)): # coerce into an IRIref, but fallout as untyped text otherwise try: iu = I(iu) except ValueError as e: # attempt to recover by percent encoding try: iu = I(iri.percent_encode(iu)) except ValueError as e: ctx.extras['logger'].warn('Unable to convert "{}" to IRI reference:\n{}'.format(iu, e)) if base is not None and isinstance(iu, I): iu = I(iri.absolutize(iu, base)) ret.append(iu) return ret
def parse_iter(csvfp, template_obj, model_fact=newmodel, csv_fact=None, prerow=None, header_loc=None, nosy=None): ''' Parse CSV file into Versa model based on template for interpreting the data Yield a new model representing each row csvfp - file-like object with CSV content template_obj - string format template that serves as Versa literal template for each row, or callable that takes the dict of each row's data and returns a versa literate string. e.g. of the latter might be a function that uses Jinja or Mako for more sophisticated templating model_fact - callable that provides a Versa model to receive the model intepreted from the Versa literate of each row csv_fact - callable that convers data from csvfp into Python csv module-compatible objects prerow - callable to preprocess row mapping from CSV header_loc - how many rows down in the CSV file header data can be found nosy - optional function which is called with the result of each row's Versa literal output, useful for debugging ''' def process_rows(rows): ''' Handle a list of rows (a list of 1 unless prerow is a generator) ''' for row in rows: if isinstance(template_obj, str): vliterate_text = template_obj.format(**row) else: vliterate_text = template_obj(row) if nosy: nosy(vliterate_text) model = model_fact() markdown_parse(vliterate_text, model) yield model if csv_fact is None: rows = csv.DictReader(csvfp, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) else: rows = csv_fact(csvfp) first_proper_row = True for (row_ix, row) in enumerate(rows): if first_proper_row: adapted_keys = {} for k in row.keys(): # URI escape, but treat spaces as special case, for convenience adapted = iri.percent_encode(k.replace(' ', '_')) #adapted = OMIT_FROM_SLUG_PAT.sub('_', k) # Ensure there are no clashes after escaping while adapted in adapted_keys: adapted_keys += '_' adapted_keys[k] = adapted first_proper_row = False for k, ad_k in adapted_keys.items(): row[ad_k] = row[k] if inspect.isgeneratorfunction(prerow): yield from process_rows(prerow(row)) elif prerow: yield from process_rows([prerow(row)]) else: yield from process_rows([row])
def DISABLED_test_non_bmp1(): '''non-BMP characters: ""\U00010000\U0010FFFD""''' unencoded = '\U00010000\U0010FFFD' encoded = '%F0%90%80%80%F4%8F%BF%BD' assert encoded == iri.percent_encode(unencoded), unencoded assert unencoded == iri.percent_decode(encoded), unencoded