Пример #1
0
def DISABLED_test_non_bmp2():
    '''non-BMP characters: "\ud800\udc00\udbff\udffd"'''
    unencoded_in = '\ud800\udc00\udbff\udffd'
    encoded = '%F0%90%80%80%F4%8F%BF%BD'
    unencoded_out = '\U00010000\U0010FFFD'
    assert encoded == iri.percent_encode(unencoded_in), unencoded_in
    assert unencoded_out == iri.percent_decode(encoded), unencoded_in
Пример #2
0
def DISABLED_test_non_bmp3():
    '''non-BMP characters 3'''
    unencoded = ''.join(map(chr, range(256)))
    encoded = '%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F' \
              '%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F' \
              '%20%21%22%23%24%25%26%27%28%29%2A%2B%2C-.%2F' \
              '0123456789%3A%3B%3C%3D%3E%3F%40' \
              'ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60' \
              'abcdefghijklmnopqrstuvwxyz%7B%7C%7D~' \
              '%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F' \
              '%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F' \
              '%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF' \
              '%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF' \
              '%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF' \
              '%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF' \
              '%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF' \
              '%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF'
    for part in (1,2,3,15):
        enc_name = 'iso-8859-%d' % part
        try:
            codecs.lookup(enc_name)
        except LookupError:
            warnings.warn('Not supported on this platform')
            continue
        assert encoded == iri.percent_encode(unencoded, encoding=enc_name), enc_name
        assert unencoded == iri.percent_decode(encoded, encoding=enc_name), enc_name
Пример #3
0
def DISABLED_test_percent_encode_template(unencoded, encoded):
    if len(unencoded) > 10:
        test_title = unencoded[:11] + '...'
    else:
        test_title = unencoded
    assert encoded == iri.percent_encode(unencoded)
    assert unencoded == iri.percent_decode(encoded)
Пример #4
0
def DISABLED_test_non_bmp3():
    '''non-BMP characters 3'''
    unencoded = ''.join(map(chr, range(256)))
    encoded = '%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F' \
              '%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F' \
              '%20%21%22%23%24%25%26%27%28%29%2A%2B%2C-.%2F' \
              '0123456789%3A%3B%3C%3D%3E%3F%40' \
              'ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60' \
              'abcdefghijklmnopqrstuvwxyz%7B%7C%7D~' \
              '%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F' \
              '%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F' \
              '%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF' \
              '%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF' \
              '%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF' \
              '%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF' \
              '%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF' \
              '%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF'
    for part in (1, 2, 3, 15):
        enc_name = 'iso-8859-%d' % part
        try:
            codecs.lookup(enc_name)
        except LookupError:
            warnings.warn('Not supported on this platform')
            continue
        assert encoded == iri.percent_encode(unencoded,
                                             encoding=enc_name), enc_name
        assert unencoded == iri.percent_decode(encoded,
                                               encoding=enc_name), enc_name
Пример #5
0
    def _toiri(ctx):
        _arg = arg(ctx) if is_pipeline_action(arg) else arg
        _arg = [_arg] if not isinstance(_arg, list) else _arg
        ret = []
        for u in _arg:
            iu = u
            if not (ignore_refs and not iri.is_absolute(iu)):
                # coerce into an IRIref, but fallout as untyped text otherwise
                try:
                    iu = I(iu)
                except ValueError as e:
                    # attempt to recover by percent encoding
                    try:
                        iu = I(iri.percent_encode(iu))
                    except ValueError as e:
                        ctx.extras['logger'].warn(
                            'Unable to convert "{}" to IRI reference:\n{}'.
                            format(iu, e))

                if base is not None and isinstance(iu, I):
                    iu = I(iri.absolutize(iu, base))

            ret.append(iu)

        return ret
Пример #6
0
def DISABLED_test_non_bmp2():
    '''non-BMP characters: "\ud800\udc00\udbff\udffd"'''
    unencoded_in = '\ud800\udc00\udbff\udffd'
    encoded = '%F0%90%80%80%F4%8F%BF%BD'
    unencoded_out = '\U00010000\U0010FFFD'
    assert encoded == iri.percent_encode(unencoded_in), unencoded_in
    assert unencoded_out == iri.percent_decode(encoded), unencoded_in
Пример #7
0
def DISABLED_test_percent_encode_template(unencoded, encoded):
    if len(unencoded) > 10:
        test_title = unencoded[:11] + '...'
    else:
        test_title = unencoded
    assert encoded == iri.percent_encode(unencoded)
    assert unencoded == iri.percent_decode(encoded)
Пример #8
0
    def _relator_property(ctx):
        '''
        Versa action function Utility to specify a list of relationships

        :param ctx: Versa context used in processing (e.g. includes the prototype link)
        :return: List of relationships computed from the source text
        '''
        _text_in = text_in(ctx) if callable(text_in) else text_in
        if not isinstance(_text_in, list): _text_in = [_text_in]
        #Take into account RDA-isms such as $iContainer of (expression) by stripping the parens https://foundry.zepheira.com/topics/380
        return [((prefix or '') + iri.percent_encode(slugify(RDA_PARENS_PAT.sub('', ti), False))) if ti else '' for ti in _text_in]
Пример #9
0
    def _relator_property(ctx):
        '''
        Versa action function Utility to specify a list of relationships

        :param ctx: Versa context used in processing (e.g. includes the prototype link)
        :return: List of relationships computed from the source text
        '''
        _text_in = text_in(ctx) if callable(text_in) else text_in
        _prefix = prefix or ''
        if not isinstance(_text_in, list): _text_in = [_text_in]
        #Take into account RDA-isms such as $iContainer of (expression) by stripping the parens https://foundry.zepheira.com/topics/380
        properties = [
            (_prefix +
             iri.percent_encode(slugify(RDA_PARENS_PAT.sub('', ti), False)))
            if ti else '' for ti in _text_in
        ]
        properties = [
            prop if (allowed is None or prop in allowed) else default
            for prop in properties
        ]
        return properties
Пример #10
0
    def _res(ctx):
        _arg = arg(ctx) if callable(arg) else arg
        _arg = [_arg] if not isinstance(_arg, list) else _arg
        ret = []
        for u in _arg:
            iu = None
            try:
                iu = I(u)
            except ValueError:
                # attempt to recover by percent encoding
                try:
                    iu = I(iri.percent_encode(u))
                except ValueError as e:
                    ctx.logger('Unable to convert "{}" to IRI reference:\n{}'.format(u, e))
                    continue

            if iu and not iri.is_absolute(iu) and base is not None:
                iu = I(iri.absolutize(iu, base))

            ret.append(iu)

        return ret
Пример #11
0
    def _res(ctx):
        _arg = arg(ctx) if callable(arg) else arg
        _arg = [_arg] if not isinstance(_arg, list) else _arg
        ret = []
        for u in _arg:
            iu = u
            if not (ignore_refs and not iri.is_absolute(iu)):
                # coerce into an IRIref, but fallout as untyped text otherwise
                try:
                    iu = I(iu)
                except ValueError as e:
                    # attempt to recover by percent encoding
                    try:
                        iu = I(iri.percent_encode(iu))
                    except ValueError as e:
                        ctx.extras['logger'].warn('Unable to convert "{}" to IRI reference:\n{}'.format(iu, e))

                if base is not None and isinstance(iu, I):
                    iu = I(iri.absolutize(iu, base))

            ret.append(iu)

        return ret
Пример #12
0
def parse_iter(csvfp, template_obj, model_fact=newmodel,
                csv_fact=None, prerow=None, header_loc=None, nosy=None):
    '''
    Parse CSV file into Versa model based on template for interpreting the data
    Yield a new model representing each row

    csvfp - file-like object with CSV content
    template_obj - string format template that serves as Versa literal template
            for each row, or callable that takes the dict of each row's data and
            returns a versa literate string. e.g. of the latter might be a
            function that uses Jinja or Mako for more sophisticated templating
    model_fact - callable that provides a Versa model to receive the model
            intepreted from the Versa literate of each row
    csv_fact - callable that convers data from csvfp into Python csv
            module-compatible objects
    prerow - callable to preprocess row mapping from CSV
    header_loc - how many rows down in the CSV file header data can be found
    nosy - optional function which is called with the result of each row's
            Versa literal output, useful for debugging
    '''
    def process_rows(rows):
        '''
        Handle a list of rows (a list of 1 unless prerow is a generator)
        '''
        for row in rows:
            if isinstance(template_obj, str):
                vliterate_text = template_obj.format(**row)
            else:
                vliterate_text = template_obj(row)
            if nosy:
                nosy(vliterate_text)
            model = model_fact()
            markdown_parse(vliterate_text, model)
            yield model

    if csv_fact is None:
        rows = csv.DictReader(csvfp, delimiter=',',
                                quotechar='"', quoting=csv.QUOTE_MINIMAL)
    else:
        rows = csv_fact(csvfp)

    first_proper_row = True
    for (row_ix, row) in enumerate(rows):
        if first_proper_row:
            adapted_keys = {}
            for k in row.keys():
                # URI escape, but treat spaces as special case, for convenience
                adapted = iri.percent_encode(k.replace(' ', '_'))
                #adapted = OMIT_FROM_SLUG_PAT.sub('_', k)
                # Ensure there are no clashes after escaping
                while adapted in adapted_keys:
                    adapted_keys += '_'
                adapted_keys[k] = adapted
            first_proper_row = False

        for k, ad_k in adapted_keys.items():
            row[ad_k] = row[k]
        if inspect.isgeneratorfunction(prerow):
            yield from process_rows(prerow(row))
        elif prerow:
            yield from process_rows([prerow(row)])
        else:
            yield from process_rows([row])
Пример #13
0
def DISABLED_test_non_bmp1():
    '''non-BMP characters: ""\U00010000\U0010FFFD""'''
    unencoded = '\U00010000\U0010FFFD'
    encoded = '%F0%90%80%80%F4%8F%BF%BD'
    assert encoded == iri.percent_encode(unencoded), unencoded
    assert unencoded == iri.percent_decode(encoded), unencoded
Пример #14
0
def DISABLED_test_non_bmp1():
    '''non-BMP characters: ""\U00010000\U0010FFFD""'''
    unencoded = '\U00010000\U0010FFFD'
    encoded = '%F0%90%80%80%F4%8F%BF%BD'
    assert encoded == iri.percent_encode(unencoded), unencoded
    assert unencoded == iri.percent_decode(encoded), unencoded