def edit_distance(s1, s2): """ FROM http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance# Python LICENCE http://creativecommons.org/licenses/by-sa/3.0/ """ if len(s1) < len(s2): return edit_distance(s2, s1) # len(s1) >= len(s2) if len(s2) == 0: return 1.0 previous_row = xrange(len(s2) + 1) for i, c1 in enumerate(s1): current_row = [i + 1] for j, c2 in enumerate(s2): insertions = previous_row[ j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer deletions = current_row[j] + 1 # than s2 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1] / len(s1)
def _all_combos(self): """ RETURN AN ITERATOR OF ALL COORDINATES """ combos = _product(self.dims) if not combos: return calc = [(coalesce(_product(self.dims[i+1:]), 1), mm) for i, mm in enumerate(self.dims)] for c in xrange(combos): yield tuple(int(c / dd) % mm for dd, mm in calc)
def _all_combos(self): """ RETURN AN ITERATOR OF ALL COORDINATES """ combos = _product(self.dims) if not combos: return calc = [(coalesce(_product(self.dims[i+1:]), 1), mm) for i, mm in enumerate(self.dims)] for c in xrange(combos): yield tuple(int(c / dd) % mm for dd, mm in calc)
def edit_distance(s1, s2): """ FROM http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance# Python LICENCE http://creativecommons.org/licenses/by-sa/3.0/ """ if len(s1) < len(s2): return edit_distance(s2, s1) # len(s1) >= len(s2) if len(s2) == 0: return 1.0 previous_row = xrange(len(s2) + 1) for i, c1 in enumerate(s1): current_row = [i + 1] for j, c2 in enumerate(s2): insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer deletions = current_row[j] + 1 # than s2 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1] / len(s1)
def _rate_limiter(self, please_stop): try: max_requests = self.requests.max recent_requests = [] while not please_stop: now = Date.now() too_old = now - self.amortization_period recent_requests = [t for t in recent_requests if t > too_old] num_recent = len(recent_requests) if num_recent >= max_requests: space_free_at = recent_requests[0] + self.amortization_period (please_stop | Till(till=space_free_at.unix)).wait() continue for _ in xrange(num_recent, max_requests): request = self.todo.pop() now = Date.now() recent_requests.append(now) self.requests.add(request) except Exception as e: Log.warning("failure", cause=e)
def _rate_limiter(self, please_stop): try: max_requests = self.requests.max recent_requests = [] while not please_stop: now = Date.now() too_old = now - self.amortization_period recent_requests = [t for t in recent_requests if t > too_old] num_recent = len(recent_requests) if num_recent >= max_requests: space_free_at = recent_requests[0] + self.amortization_period (please_stop | Till(till=space_free_at.unix)).wait() continue for _ in xrange(num_recent, max_requests): request = self.todo.pop() now = Date.now() recent_requests.append(now) self.requests.add(request) except Exception as e: Log.warning("failure", cause=e)
def pretty_json(value): try: if value is False: return "false" elif value is True: return "true" elif isinstance(value, Mapping): try: items = sort_using_key(list(value.items()), lambda r: r[0]) values = [ encode_basestring(k) + PRETTY_COLON + indent(pretty_json(v)).strip() for k, v in items if v != None ] if not values: return "{}" elif len(values) == 1: return "{" + values[0] + "}" else: return "{\n" + INDENT + (",\n" + INDENT).join(values) + "\n}" except Exception as e: from mo_logs import Log from mo_math import OR if OR(not isinstance(k, text_type) for k in value.keys()): Log.error("JSON must have string keys: {{keys}}:", keys=[k for k in value.keys()], cause=e) Log.error("problem making dict pretty: keys={{keys}}:", keys=[k for k in value.keys()], cause=e) elif value in (None, Null): return "null" elif isinstance(value, (text_type, binary_type)): if isinstance(value, binary_type): value = utf82unicode(value) try: return quote(value) except Exception as e: from mo_logs import Log try: Log.note( "try explicit convert of string with length {{length}}", length=len(value)) acc = [QUOTE] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception: c2 = c c3 = text_type(c2) acc.append(c3) except BaseException: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(QUOTE) output = u"".join(acc) Log.note("return value of length {{length}}", length=len(output)) return output except BaseException as f: Log.warning("can not even explicit convert {{type}}", type=f.__class__.__name__, cause=f) return "null" elif isinstance(value, list): if not value: return "[]" if ARRAY_MAX_COLUMNS == 1: return "[\n" + ",\n".join( [indent(pretty_json(v)) for v in value]) + "\n]" if len(value) == 1: j = pretty_json(value[0]) if j.find("\n") >= 0: return "[\n" + indent(j) + "\n]" else: return "[" + j + "]" js = [pretty_json(v) for v in value] max_len = max(*[len(j) for j in js]) if max_len <= ARRAY_ITEM_MAX_LENGTH and max( *[j.find("\n") for j in js]) == -1: # ALL TINY VALUES num_columns = max( 1, min( ARRAY_MAX_COLUMNS, int( floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2))))) # +2 TO COMPENSATE FOR COMMAS if len(js) <= num_columns: # DO NOT ADD \n IF ONLY ONE ROW return "[" + PRETTY_COMMA.join(js) + "]" if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN return "[\n" + ",\n".join( [indent(pretty_json(v)) for v in value]) + "\n]" content = ",\n".join( PRETTY_COMMA.join( j.rjust(max_len) for j in js[r:r + num_columns]) for r in xrange(0, len(js), num_columns)) return "[\n" + indent(content) + "\n]" pretty_list = js output = ["[\n"] for i, p in enumerate(pretty_list): try: if i > 0: output.append(",\n") output.append(indent(p)) except Exception: from mo_logs import Log Log.warning( "problem concatenating string of length {{len1}} and {{len2}}", len1=len("".join(output)), len2=len(p)) output.append("\n]") try: return "".join(output) except Exception as e: from mo_logs import Log Log.error("not expected", cause=e) elif hasattr(value, '__data__'): d = value.__data__() return pretty_json(d) elif hasattr(value, '__json__'): j = value.__json__() if j == None: return " null " # TODO: FIND OUT WHAT CAUSES THIS return pretty_json(json_decoder(j)) elif scrub(value) is None: return "null" elif hasattr(value, '__iter__'): return pretty_json(list(value)) elif hasattr(value, '__call__'): return "null" else: try: if int(value) == value: return text_type(int(value)) except Exception: pass try: if float(value) == value: return text_type(float(value)) except Exception: pass return pypy_json_encode(value) except Exception as e: problem_serializing(value, e)
def pretty_json(value): try: if value is False: return "false" elif value is True: return "true" elif is_data(value): try: items = sort_using_key(value.items(), lambda r: r[0]) values = [encode_basestring(k) + PRETTY_COLON + pretty_json(v) for k, v in items if v != None] if not values: return "{}" elif len(values) == 1: return "{" + values[0] + "}" else: return "{\n" + ",\n".join(indent(v) for v in values) + "\n}" except Exception as e: from mo_logs import Log from mo_math import OR if OR(not is_text(k) for k in value.keys()): Log.error( "JSON must have string keys: {{keys}}:", keys=[k for k in value.keys()], cause=e ) Log.error( "problem making dict pretty: keys={{keys}}:", keys=[k for k in value.keys()], cause=e ) elif value in (None, Null): return "null" elif value.__class__ in (binary_type, text_type): if is_binary(value): value = utf82unicode(value) try: return quote(value) except Exception as e: from mo_logs import Log try: Log.note("try explicit convert of string with length {{length}}", length=len(value)) acc = [QUOTE] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception: c2 = c c3 = text_type(c2) acc.append(c3) except BaseException: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(QUOTE) output = u"".join(acc) Log.note("return value of length {{length}}", length=len(output)) return output except BaseException as f: Log.warning("can not convert {{type}} to json", type=f.__class__.__name__, cause=f) return "null" elif is_list(value): if not value: return "[]" if ARRAY_MAX_COLUMNS == 1: return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]" if len(value) == 1: j = pretty_json(value[0]) if j.find("\n") >= 0: return "[\n" + indent(j) + "\n]" else: return "[" + j + "]" js = [pretty_json(v) for v in value] max_len = max(*[len(j) for j in js]) if max_len <= ARRAY_ITEM_MAX_LENGTH and max(*[j.find("\n") for j in js]) == -1: # ALL TINY VALUES num_columns = max(1, min(ARRAY_MAX_COLUMNS, int(floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2))))) # +2 TO COMPENSATE FOR COMMAS if len(js) <= num_columns: # DO NOT ADD \n IF ONLY ONE ROW return "[" + PRETTY_COMMA.join(js) + "]" if num_columns == 1: # DO NOT rjust IF THERE IS ONLY ONE COLUMN return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]" content = ",\n".join( PRETTY_COMMA.join(j.rjust(max_len) for j in js[r:r + num_columns]) for r in xrange(0, len(js), num_columns) ) return "[\n" + indent(content) + "\n]" pretty_list = js output = ["[\n"] for i, p in enumerate(pretty_list): try: if i > 0: output.append(",\n") output.append(indent(p)) except Exception: from mo_logs import Log Log.warning("problem concatenating string of length {{len1}} and {{len2}}", len1=len("".join(output)), len2=len(p) ) output.append("\n]") try: return "".join(output) except Exception as e: from mo_logs import Log Log.error("not expected", cause=e) elif hasattr(value, '__data__'): d = value.__data__() return pretty_json(d) elif hasattr(value, '__json__'): j = value.__json__() if j == None: return " null " # TODO: FIND OUT WHAT CAUSES THIS return pretty_json(json_decoder(j)) elif scrub(value) is None: return "null" elif hasattr(value, '__iter__'): return pretty_json(list(value)) elif hasattr(value, '__call__'): return "null" else: try: if int(value) == value: return text_type(int(value)) except Exception: pass try: if float(value) == value: return text_type(float(value)) except Exception: pass return pypy_json_encode(value) except Exception as e: problem_serializing(value, e)