Пример #1
0
def edit_distance(s1, s2):
    """
    FROM http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance# Python
    LICENCE http://creativecommons.org/licenses/by-sa/3.0/
    """
    if len(s1) < len(s2):
        return edit_distance(s2, s1)

    # len(s1) >= len(s2)
    if len(s2) == 0:
        return 1.0

    previous_row = xrange(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[
                j +
                1] + 1  # j+1 instead of j since previous_row and current_row are one character longer
            deletions = current_row[j] + 1  # than s2
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1] / len(s1)
Пример #2
0
    def _all_combos(self):
        """
        RETURN AN ITERATOR OF ALL COORDINATES
        """
        combos = _product(self.dims)
        if not combos:
            return

        calc = [(coalesce(_product(self.dims[i+1:]), 1), mm) for i, mm in enumerate(self.dims)]

        for c in xrange(combos):
            yield tuple(int(c / dd) % mm for dd, mm in calc)
Пример #3
0
    def _all_combos(self):
        """
        RETURN AN ITERATOR OF ALL COORDINATES
        """
        combos = _product(self.dims)
        if not combos:
            return

        calc = [(coalesce(_product(self.dims[i+1:]), 1), mm) for i, mm in enumerate(self.dims)]

        for c in xrange(combos):
            yield tuple(int(c / dd) % mm for dd, mm in calc)
Пример #4
0
def edit_distance(s1, s2):
    """
    FROM http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance# Python
    LICENCE http://creativecommons.org/licenses/by-sa/3.0/
    """
    if len(s1) < len(s2):
        return edit_distance(s2, s1)

    # len(s1) >= len(s2)
    if len(s2) == 0:
        return 1.0

    previous_row = xrange(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1  # j+1 instead of j since previous_row and current_row are one character longer
            deletions = current_row[j] + 1  # than s2
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1] / len(s1)
Пример #5
0
    def _rate_limiter(self, please_stop):
        try:
            max_requests = self.requests.max
            recent_requests = []

            while not please_stop:
                now = Date.now()
                too_old = now - self.amortization_period

                recent_requests = [t for t in recent_requests if t > too_old]

                num_recent = len(recent_requests)
                if num_recent >= max_requests:
                    space_free_at = recent_requests[0] + self.amortization_period
                    (please_stop | Till(till=space_free_at.unix)).wait()
                    continue
                for _ in xrange(num_recent, max_requests):
                    request = self.todo.pop()
                    now = Date.now()
                    recent_requests.append(now)
                    self.requests.add(request)
        except Exception as e:
            Log.warning("failure", cause=e)
Пример #6
0
    def _rate_limiter(self, please_stop):
        try:
            max_requests = self.requests.max
            recent_requests = []

            while not please_stop:
                now = Date.now()
                too_old = now - self.amortization_period

                recent_requests = [t for t in recent_requests if t > too_old]

                num_recent = len(recent_requests)
                if num_recent >= max_requests:
                    space_free_at = recent_requests[0] + self.amortization_period
                    (please_stop | Till(till=space_free_at.unix)).wait()
                    continue
                for _ in xrange(num_recent, max_requests):
                    request = self.todo.pop()
                    now = Date.now()
                    recent_requests.append(now)
                    self.requests.add(request)
        except Exception as e:
            Log.warning("failure", cause=e)
Пример #7
0
def pretty_json(value):
    try:
        if value is False:
            return "false"
        elif value is True:
            return "true"
        elif isinstance(value, Mapping):
            try:
                items = sort_using_key(list(value.items()), lambda r: r[0])
                values = [
                    encode_basestring(k) + PRETTY_COLON +
                    indent(pretty_json(v)).strip() for k, v in items
                    if v != None
                ]
                if not values:
                    return "{}"
                elif len(values) == 1:
                    return "{" + values[0] + "}"
                else:
                    return "{\n" + INDENT + (",\n" +
                                             INDENT).join(values) + "\n}"
            except Exception as e:
                from mo_logs import Log
                from mo_math import OR

                if OR(not isinstance(k, text_type) for k in value.keys()):
                    Log.error("JSON must have string keys: {{keys}}:",
                              keys=[k for k in value.keys()],
                              cause=e)

                Log.error("problem making dict pretty: keys={{keys}}:",
                          keys=[k for k in value.keys()],
                          cause=e)
        elif value in (None, Null):
            return "null"
        elif isinstance(value, (text_type, binary_type)):
            if isinstance(value, binary_type):
                value = utf82unicode(value)
            try:
                return quote(value)
            except Exception as e:
                from mo_logs import Log

                try:
                    Log.note(
                        "try explicit convert of string with length {{length}}",
                        length=len(value))
                    acc = [QUOTE]
                    for c in value:
                        try:
                            try:
                                c2 = ESCAPE_DCT[c]
                            except Exception:
                                c2 = c
                            c3 = text_type(c2)
                            acc.append(c3)
                        except BaseException:
                            pass
                            # Log.warning("odd character {{ord}} found in string.  Ignored.",  ord= ord(c)}, cause=g)
                    acc.append(QUOTE)
                    output = u"".join(acc)
                    Log.note("return value of length {{length}}",
                             length=len(output))
                    return output
                except BaseException as f:
                    Log.warning("can not even explicit convert {{type}}",
                                type=f.__class__.__name__,
                                cause=f)
                    return "null"
        elif isinstance(value, list):
            if not value:
                return "[]"

            if ARRAY_MAX_COLUMNS == 1:
                return "[\n" + ",\n".join(
                    [indent(pretty_json(v)) for v in value]) + "\n]"

            if len(value) == 1:
                j = pretty_json(value[0])
                if j.find("\n") >= 0:
                    return "[\n" + indent(j) + "\n]"
                else:
                    return "[" + j + "]"

            js = [pretty_json(v) for v in value]
            max_len = max(*[len(j) for j in js])
            if max_len <= ARRAY_ITEM_MAX_LENGTH and max(
                    *[j.find("\n") for j in js]) == -1:
                # ALL TINY VALUES
                num_columns = max(
                    1,
                    min(
                        ARRAY_MAX_COLUMNS,
                        int(
                            floor((ARRAY_ROW_LENGTH + 2.0) /
                                  float(max_len +
                                        2)))))  # +2 TO COMPENSATE FOR COMMAS
                if len(js) <= num_columns:  # DO NOT ADD \n IF ONLY ONE ROW
                    return "[" + PRETTY_COMMA.join(js) + "]"
                if num_columns == 1:  # DO NOT rjust IF THERE IS ONLY ONE COLUMN
                    return "[\n" + ",\n".join(
                        [indent(pretty_json(v)) for v in value]) + "\n]"

                content = ",\n".join(
                    PRETTY_COMMA.join(
                        j.rjust(max_len) for j in js[r:r + num_columns])
                    for r in xrange(0, len(js), num_columns))
                return "[\n" + indent(content) + "\n]"

            pretty_list = js

            output = ["[\n"]
            for i, p in enumerate(pretty_list):
                try:
                    if i > 0:
                        output.append(",\n")
                    output.append(indent(p))
                except Exception:
                    from mo_logs import Log

                    Log.warning(
                        "problem concatenating string of length {{len1}} and {{len2}}",
                        len1=len("".join(output)),
                        len2=len(p))
            output.append("\n]")
            try:
                return "".join(output)
            except Exception as e:
                from mo_logs import Log

                Log.error("not expected", cause=e)
        elif hasattr(value, '__data__'):
            d = value.__data__()
            return pretty_json(d)
        elif hasattr(value, '__json__'):
            j = value.__json__()
            if j == None:
                return "   null   "  # TODO: FIND OUT WHAT CAUSES THIS
            return pretty_json(json_decoder(j))
        elif scrub(value) is None:
            return "null"
        elif hasattr(value, '__iter__'):
            return pretty_json(list(value))
        elif hasattr(value, '__call__'):
            return "null"
        else:
            try:
                if int(value) == value:
                    return text_type(int(value))
            except Exception:
                pass

            try:
                if float(value) == value:
                    return text_type(float(value))
            except Exception:
                pass

            return pypy_json_encode(value)

    except Exception as e:
        problem_serializing(value, e)
Пример #8
0
def pretty_json(value):
    try:
        if value is False:
            return "false"
        elif value is True:
            return "true"
        elif is_data(value):
            try:
                items = sort_using_key(value.items(), lambda r: r[0])
                values = [encode_basestring(k) + PRETTY_COLON + pretty_json(v) for k, v in items if v != None]
                if not values:
                    return "{}"
                elif len(values) == 1:
                    return "{" + values[0] + "}"
                else:
                    return "{\n" + ",\n".join(indent(v) for v in values) + "\n}"
            except Exception as e:
                from mo_logs import Log
                from mo_math import OR

                if OR(not is_text(k) for k in value.keys()):
                    Log.error(
                        "JSON must have string keys: {{keys}}:",
                        keys=[k for k in value.keys()],
                        cause=e
                    )

                Log.error(
                    "problem making dict pretty: keys={{keys}}:",
                    keys=[k for k in value.keys()],
                    cause=e
                )
        elif value in (None, Null):
            return "null"
        elif value.__class__ in (binary_type, text_type):
            if is_binary(value):
                value = utf82unicode(value)
            try:
                return quote(value)
            except Exception as e:
                from mo_logs import Log

                try:
                    Log.note("try explicit convert of string with length {{length}}", length=len(value))
                    acc = [QUOTE]
                    for c in value:
                        try:
                            try:
                                c2 = ESCAPE_DCT[c]
                            except Exception:
                                c2 = c
                            c3 = text_type(c2)
                            acc.append(c3)
                        except BaseException:
                            pass
                            # Log.warning("odd character {{ord}} found in string.  Ignored.",  ord= ord(c)}, cause=g)
                    acc.append(QUOTE)
                    output = u"".join(acc)
                    Log.note("return value of length {{length}}", length=len(output))
                    return output
                except BaseException as f:
                    Log.warning("can not convert {{type}} to json", type=f.__class__.__name__, cause=f)
                    return "null"
        elif is_list(value):
            if not value:
                return "[]"

            if ARRAY_MAX_COLUMNS == 1:
                return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]"

            if len(value) == 1:
                j = pretty_json(value[0])
                if j.find("\n") >= 0:
                    return "[\n" + indent(j) + "\n]"
                else:
                    return "[" + j + "]"

            js = [pretty_json(v) for v in value]
            max_len = max(*[len(j) for j in js])
            if max_len <= ARRAY_ITEM_MAX_LENGTH and max(*[j.find("\n") for j in js]) == -1:
                # ALL TINY VALUES
                num_columns = max(1, min(ARRAY_MAX_COLUMNS, int(floor((ARRAY_ROW_LENGTH + 2.0) / float(max_len + 2)))))  # +2 TO COMPENSATE FOR COMMAS
                if len(js) <= num_columns:  # DO NOT ADD \n IF ONLY ONE ROW
                    return "[" + PRETTY_COMMA.join(js) + "]"
                if num_columns == 1:  # DO NOT rjust IF THERE IS ONLY ONE COLUMN
                    return "[\n" + ",\n".join([indent(pretty_json(v)) for v in value]) + "\n]"

                content = ",\n".join(
                    PRETTY_COMMA.join(j.rjust(max_len) for j in js[r:r + num_columns])
                    for r in xrange(0, len(js), num_columns)
                )
                return "[\n" + indent(content) + "\n]"

            pretty_list = js

            output = ["[\n"]
            for i, p in enumerate(pretty_list):
                try:
                    if i > 0:
                        output.append(",\n")
                    output.append(indent(p))
                except Exception:
                    from mo_logs import Log

                    Log.warning("problem concatenating string of length {{len1}} and {{len2}}",
                        len1=len("".join(output)),
                        len2=len(p)
                    )
            output.append("\n]")
            try:
                return "".join(output)
            except Exception as e:
                from mo_logs import Log

                Log.error("not expected", cause=e)
        elif hasattr(value, '__data__'):
            d = value.__data__()
            return pretty_json(d)
        elif hasattr(value, '__json__'):
            j = value.__json__()
            if j == None:
                return "   null   "  # TODO: FIND OUT WHAT CAUSES THIS
            return pretty_json(json_decoder(j))
        elif scrub(value) is None:
            return "null"
        elif hasattr(value, '__iter__'):
            return pretty_json(list(value))
        elif hasattr(value, '__call__'):
            return "null"
        else:
            try:
                if int(value) == value:
                    return text_type(int(value))
            except Exception:
                pass

            try:
                if float(value) == value:
                    return text_type(float(value))
            except Exception:
                pass

            return pypy_json_encode(value)

    except Exception as e:
        problem_serializing(value, e)