def entity_decode_hex(input, errors='strict'): """ Decode hex HTML entity data in a string. """ if _is_unicode(input): if '&' not in input: return input, len(input) bits = _asciire.split(input) res = [bits[0]] append = res.append for i in range(1, len(bits), 2): append(entityunquote(str(bits[i])) .encode('bin').decode('bin')) append(bits[i + 1]) preamble_regex = re.compile(r"&#x", flags=re.I) bits = preamble_regex.split(input) # fastpath if len(bits) == 1: return input, len(input) res = [bits[0]] append = res.append for item in bits[1:]: try: append(_hextochr[item[:2]]) append(item[3:]) except KeyError: append('&#x') append(item) append(';') return (''.join(res), len(input))
def ascii85_encode(input, errors='strict'): assert not input.endswith('\0'), "Trailing nulls unsupported" if _is_unicode(input): # convert from multibyte to codepoint in a horrible way. Good # luck debugging the stupid bugs here fuckers. o, l = bin_encode(input) input, l = bin_decode(o) #encoding is adobe not btoa bs = 4 padding = bs - ((len(input) % bs) or bs) input += '\0' * padding output = "" for block in blocks(input, bs): start = unpack(">I", block)[0] if not start: output += "z" continue quot, rem = divmod(start, 85) chr_block = chr(rem + 33) for i in xrange(bs): quot, rem = divmod(quot, 85) chr_block += chr(rem + 33) output += ''.join(reversed(chr_block)) if padding: output = output[:-padding] return output, len(input)
def entity_decode_hex(input, errors='strict'): """ Decode hex HTML entity data in a string. """ if _is_unicode(input): if '%' not in input: return s bits = _asciire.split(input) res = [bits[0]] append = res.append for i in range(1, len(bits), 2): append(unquote(str(bits[i])).decode('latin1')) append(bits[i + 1]) return (''.join(res), len(input)) preamble_regex = re.compile(r"&#x", flags=re.I) bits = preamble_regex.split(input) # fastpath if len(bits) == 1: return input res = [bits[0]] append = res.append for item in bits[1:]: try: append(_hextochr[item[:2]]) append(item[3:]) except KeyError: append('&#x') append(item) append(';') return (''.join(res), len(input))
def _foursquare_urlencode(query, doseq=0, safe_chars="&/,+"): """Gnarly hack because Foursquare doesn't properly handle standard url encoding""" # Original doc: http://docs.python.org/2/library/urllib.html#urllib.urlencode # Works the same way as urllib.urlencode except two differences - # 1. it uses `quote()` instead of `quote_plus()` # 2. it takes an extra parameter called `safe_chars` which is a string # having the characters which should not be encoded. # # Courtesy of github.com/iambibhas if hasattr(query,"items"): # mapping objects query = query.items() else: # it's a bother at times that strings and string-like objects are # sequences... try: # non-sequence items should not work with len() # non-empty strings will fail this if len(query) and not isinstance(query[0], tuple): raise TypeError # zero-length sequences of all types will get here and succeed, # but that's a minor nit - since the original implementation # allowed empty dicts that type of behavior probably should be # preserved for consistency except TypeError: ty,va,tb = sys.exc_info() raise TypeError, "not a valid non-string sequence or mapping object", tb l = [] if not doseq: # preserve old behavior for k, v in query: k = urllib.quote(str(k), safe=safe_chars) v = urllib.quote(str(v), safe=safe_chars) l.append(k + '=' + v) else: for k, v in query: k = urllib.quote(str(k), safe=safe_chars) if isinstance(v, str): v = urllib.quote(v, safe=safe_chars) l.append(k + '=' + v) elif urllib._is_unicode(v): # is there a reasonable way to convert to ASCII? # encode generates a string, but "replace" or "ignore" # lose information and "strict" can raise UnicodeError v = urllib.quote(v.encode("ASCII","replace"), safe=safe_chars) l.append(k + '=' + v) else: try: # is this a sufficient test for sequence-ness? len(v) except TypeError: # not a sequence v = urllib.quote(str(v), safe=safe_chars) l.append(k + '=' + v) else: # loop over the sequence for elt in v: l.append(k + '=' + urllib.quote(str(elt))) return '&'.join(l)
def urlencode(query,doseq=0): """ Hack of urllib's urlencode function, which can handle utf-8, but for unknown reasons, chooses not to by trying to encode everything as ascii """ if hasattr(query,"items"): # mapping objects query = query.items() else: # it's a bother at times that strings and string-like objects are # sequences... try: # non-sequence items should not work with len() # non-empty strings will fail this if len(query) and not isinstance(query[0], tuple): raise TypeError # zero-length sequences of all types will get here and succeed, # but that's a minor nit - since the original implementation # allowed empty dicts that type of behavior probably should be # preserved for consistency except TypeError: ty,va,tb = sys.exc_info() raise TypeError, "not a valid non-string sequence or mapping object", tb l = [] if not doseq: # preserve old behavior for k, v in query: k = quote_plus(str(k)) v = quote_plus(str(v)) l.append(k + '=' + v) else: for k, v in query: k = quote_plus(str(k)) if isinstance(v, str): v = quote_plus(v) l.append(k + '=' + v) elif _is_unicode(v): # is there a reasonable way to convert to ASCII? # encode generates a string, but "replace" or "ignore" # lose information and "strict" can raise UnicodeError v = quote_plus(v.encode("utf8","replace")) l.append(k + '=' + v) else: try: # is this a sufficient test for sequence-ness? x = len(v) except TypeError: # not a sequence v = quote_plus(str(v)) l.append(k + '=' + v) else: # loop over the sequence for elt in v: l.append(k + '=' + quote_plus(str(elt))) return '&'.join(l)
def urlencode(query, doseq=0): """ Hack of urllib's urlencode function, which can handle utf-8, but for unknown reasons, chooses not to by trying to encode everything as ascii """ if hasattr(query, "items"): # mapping objects query = query.items() else: # it's a bother at times that strings and string-like objects are # sequences... try: # non-sequence items should not work with len() # non-empty strings will fail this if len(query) and not isinstance(query[0], tuple): raise TypeError # zero-length sequences of all types will get here and succeed, # but that's a minor nit - since the original implementation # allowed empty dicts that type of behavior probably should be # preserved for consistency except TypeError: ty, va, tb = sys.exc_info() raise TypeError, "not a valid non-string sequence or mapping object", tb l = [] if not doseq: # preserve old behavior for k, v in query: k = quote_plus(str(k)) v = quote_plus(str(v)) l.append(k + '=' + v) else: for k, v in query: k = quote_plus(str(k)) if isinstance(v, str): v = quote_plus(v) l.append(k + '=' + v) elif _is_unicode(v): # is there a reasonable way to convert to ASCII? # encode generates a string, but "replace" or "ignore" # lose information and "strict" can raise UnicodeError v = quote_plus(v.encode("utf8", "replace")) l.append(k + '=' + v) else: try: # is this a sufficient test for sequence-ness? x = len(v) except TypeError: # not a sequence v = quote_plus(str(v)) l.append(k + '=' + v) else: # loop over the sequence for elt in v: l.append(k + '=' + quote_plus(str(elt))) return '&'.join(l)
def urlencode(query, doseq=0, safe='/<>"\'=:()'): ''' This is my version of urllib.urlencode , that adds "/" as a safe character and also adds support for "repeated parameter names". Note: This function is EXPERIMENTAL and should be used with care ;) Maybe this is the place to fix this bug: http://sourceforge.net/tracker2/?func=detail&aid=2675634&group_id=170274&atid=853652 Original documentation: Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each sequence element is converted to a separate parameter. If the query arg is a sequence of two-element tuples, the order of the parameters in the output will match the order of parameters in the input. ''' if hasattr(query,"items"): # mapping objects query = query.items() else: # it's a bother at times that strings and string-like objects are # sequences... try: # non-sequence items should not work with len() # non-empty strings will fail this if len(query) and not isinstance(query[0], tuple): raise TypeError # zero-length sequences of all types will get here and succeed, # but that's a minor nit - since the original implementation # allowed empty dicts that type of behavior probably should be # preserved for consistency except TypeError: ty,va,tb = sys.exc_info() raise TypeError, "not a valid non-string sequence or mapping object", tb l = [] if not doseq: # preserve old behavior for k, v in query: # keys are easy k = urllib.quote_plus(str(k), safe) # Check for [] in the value if isinstance(v, list): for v_item in v: v_item = urllib.quote_plus(str(v_item), safe) l.append(k + '=' + v_item) else: v = urllib.quote_plus(str(v), safe) l.append(k + '=' + v) else: for k, v in query: # keys are easy... k = urllib.quote_plus(str(k), safe) # now the value... # is string if isinstance(v, str): v = urllib.quote_plus(v, safe) l.append(k + '=' + v) # is unicode... elif urllib._is_unicode(v): # is there a reasonable way to convert to ASCII? # encode generates a string, but "replace" or "ignore" # lose information and "strict" can raise UnicodeError v = urllib.quote_plus(v.encode("ASCII","replace"), safe) l.append(k + '=' + v) else: try: # is this a sufficient test for sequence-ness? x = len(v) except TypeError: # not a sequence v = urllib.quote_plus(str(v), safe) l.append(k + '=' + v) else: # loop over the sequence for elt in v: l.append(k + '=' + urllib.quote_plus(str(elt), safe)) return '&'.join(l)
def urlencode(query, doseq=0, safe='/<>"\'=:()'): ''' This is my version of urllib.urlencode , that adds "/" as a safe character and also adds support for "repeated parameter names". Note: This function is EXPERIMENTAL and should be used with care ;) Maybe this is the place to fix this bug: http://sourceforge.net/tracker2/?func=detail&aid=2675634&group_id=170274&atid=853652 Original documentation: Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each sequence element is converted to a separate parameter. If the query arg is a sequence of two-element tuples, the order of the parameters in the output will match the order of parameters in the input. >>> import cgi >>> urlencode( cgi.parse_qs('a=1&a=c') ) 'a=1&a=c' >>> urlencode( cgi.parse_qs('a=1&b=c') ) 'a=1&b=c' >>> urlencode( cgi.parse_qs('a=á&a=2') ) 'a=%C3%A1&a=2' >>> urlencode( 'a=b&c=d' ) Traceback (most recent call last): File "<stdin>", line 1, in ? TypeError: not a valid non-string sequence or mapping object ''' if hasattr(query,"items"): # mapping objects query = query.items() else: # it's a bother at times that strings and string-like objects are # sequences... try: # non-sequence items should not work with len() # non-empty strings will fail this if len(query) and not isinstance(query[0], tuple): raise TypeError # zero-length sequences of all types will get here and succeed, # but that's a minor nit - since the original implementation # allowed empty dicts that type of behavior probably should be # preserved for consistency except TypeError: ty,va,tb = sys.exc_info() raise TypeError, "not a valid non-string sequence or mapping object", tb l = [] if not doseq: # preserve old behavior for k, v in query: # keys are easy k = urllib.quote_plus(str(k), safe) # Check for [] in the value if isinstance(v, list): for v_item in v: v_item = urllib.quote_plus(str(v_item), safe) l.append(k + '=' + v_item) else: v = urllib.quote_plus(str(v), safe) l.append(k + '=' + v) else: for k, v in query: # keys are easy... k = urllib.quote_plus(str(k), safe) # now the value... # is string if isinstance(v, str): v = urllib.quote_plus(v, safe) l.append(k + '=' + v) # is unicode... elif urllib._is_unicode(v): # is there a reasonable way to convert to ASCII? # encode generates a string, but "replace" or "ignore" # lose information and "strict" can raise UnicodeError v = urllib.quote_plus(v.encode("ASCII","replace"), safe) l.append(k + '=' + v) else: try: # is this a sufficient test for sequence-ness? x = len(v) except TypeError: # not a sequence v = urllib.quote_plus(str(v), safe) l.append(k + '=' + v) else: # loop over the sequence for elt in v: l.append(k + '=' + urllib.quote_plus(str(elt), safe)) return '&'.join(l)
def ebs_urlencode(query,doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each sequence element is converted to a separate parameter. If the query arg is a sequence of two-element tuples, the order of the parameters in the output will match the order of parameters in the input. This is different from the Python version in urllib as it uses quote instead of quote_plus for compatibility with the EBS payment gateway. """ if hasattr(query,"items"): # mapping objects query = query.items() else: # it's a bother at times that strings and string-like objects are # sequences... try: # non-sequence items should not work with len() # non-empty strings will fail this if len(query) and not isinstance(query[0], tuple): raise TypeError # zero-length sequences of all types will get here and succeed, # but that's a minor nit - since the original implementation # allowed empty dicts that type of behavior probably should be # preserved for consistency except TypeError: ty,va,tb = sys.exc_info() raise TypeError, "not a valid non-string sequence or mapping object", tb l = [] if not doseq: # preserve old behavior for k, v in query: k = quote_plus(str(k)) v = quote_plus(str(v)) l.append(k + '=' + v) else: for k, v in query: k = quote_plus(str(k)) if isinstance(v, str): v = quote_plus(v) l.append(k + '=' + v) elif _is_unicode(v): # is there a reasonable way to convert to ASCII? # encode generates a string, but "replace" or "ignore" # lose information and "strict" can raise UnicodeError v = quote_plus(v.encode("ASCII","replace")) l.append(k + '=' + v) else: try: # is this a sufficient test for sequence-ness? x = len(v) except TypeError: # not a sequence v = quote_plus(str(v)) l.append(k + '=' + v) else: # loop over the sequence for elt in v: l.append(k + '=' + quote_plus(str(elt))) return '&'.join(l)
def urlencode(query): """Encode a sequence of two-element tuples or dictionary into a URL query string. This version is adapted from the standard library to understand operators in the pyesgf.search.constraints module. If the query arg is a sequence of two-element tuples, the order of the parameters in the output will match the order of parameters in the input. """ if hasattr(query,"items"): # mapping objects query = query.items() else: # it's a bother at times that strings and string-like objects are # sequences... try: # non-sequence items should not work with len() # non-empty strings will fail this if len(query) and not isinstance(query[0], tuple): raise TypeError # zero-length sequences of all types will get here and succeed, # but that's a minor nit - since the original implementation # allowed empty dicts that type of behavior probably should be # preserved for consistency except TypeError: ty,va,tb = sys.exc_info() raise TypeError, "not a valid non-string sequence or mapping object", tb def append(k, v, tag, l): from .search.consts import OPERATOR_NEQ if tag == OPERATOR_NEQ: l.append('%s!=%s' % (k, v)) elif tag is None: l.append('%s=%s' % (k, v)) else: raise ValueError('Unknown operator tag %s' % tag) def strip_tag(v): if type(v) == tuple: tag, v = v else: tag = None return tag, v l = [] for k, v in query: tag, v = strip_tag(v) k = quote_plus(str(k)) if isinstance(v, str): v = quote_plus(v) append(k, v, tag, l) elif _is_unicode(v): # is there a reasonable way to convert to ASCII? # encode generates a string, but "replace" or "ignore" # lose information and "strict" can raise UnicodeError v = quote_plus(v.encode("ASCII","replace")) append(k, v, tag, l) else: try: # is this a sufficient test for sequence-ness? len(v) except TypeError: # not a sequence v = quote_plus(str(v)) append(k, v, tag, l) else: # loop over the sequence for elt in v: append(k, quote_plus(str(elt)), tag, l) return '&'.join(l)