Exemplo n.º 1
0
    def __div__(self, amount):
        if isinstance(amount, Duration) and amount.month:
            m = self.month
            r = self.milli

            # DO NOT CONSIDER TIME OF DAY
            tod = r % MILLI_VALUES.day
            r = r - tod

            if m == 0 and r > (MILLI_VALUES.year / 3):
                m = Math.floor(12 * self.milli / MILLI_VALUES.year)
                r -= (m / 12) * MILLI_VALUES.year
            else:
                r = r - (self.month * MILLI_VALUES.month)
                if r >= MILLI_VALUES.day * 31:
                    from pyLibrary.debugs.logs import Log
                    Log.error("Do not know how to handle")
            r = MIN(29 / 30, (r + tod) / (MILLI_VALUES.day * 30))

            output = Math.floor(m / amount.month) + r
            return output
        elif Math.is_number(amount):
            output = Duration(0)
            output.milli = self.milli / amount
            output.month = self.month / amount
            return output
        else:
            return self.milli / amount.milli
Exemplo n.º 2
0
    def __new__(cls, value=None, **kwargs):
        output = object.__new__(cls)
        if value == None:
            if kwargs:
                output.milli = datetime.timedelta(**kwargs).total_seconds() * 1000
                output.month = 0
                return output
            else:
                return None

        if Math.is_number(value):
            output._milli = float(value) * 1000
            output.month = 0
            return output
        elif isinstance(value, basestring):
            return parse(value)
        elif isinstance(value, Duration):
            output.milli = value.milli
            output.month = value.month
            return output
        elif isinstance(value, float) and Math.is_nan(value):
            return None
        else:
            from pyLibrary import convert
            from pyLibrary.debugs.logs import Log
            Log.error("Do not know type of object (" + convert.value2json(value) + ")of to make a Duration")
Exemplo n.º 3
0
    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
def geo_mean(values):
    """
    GIVEN AN ARRAY OF dicts, CALC THE GEO-MEAN ON EACH ATTRIBUTE
    """
    agg = Struct()
    for d in values:
        for k, v in d.items():
            if v != 0:
                agg[k] = nvl(agg[k], ZeroMoment.new_instance()) + Math.log(Math.abs(v))
    return {k: Math.exp(v.stats.mean) for k, v in agg.items()}
Exemplo n.º 5
0
    def test_floor_mod_identity_w_ints(self):
        for i in range(100):
            x = Random.float()*200 - 100.0
            m = floor(abs(random.gauss(0, 5)))

            if m == 0:
                self.assertEqual(Math.floor(x, m), None)
                self.assertEqual(Math.mod(x, m), None)
            else:
                self.assertAlmostEqual(Math.floor(x, m)+Math.mod(x, m), x, places=7)
Exemplo n.º 6
0
def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None):
    """
    Snagged from unittest/case.py, then modified (Aug2014)
    """
    if expected == None:  # None has no expectations
        return
    if test == expected:
        # shortcut
        return

    if not Math.is_number(expected):
        # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL
        if isinstance(expected, list) and len(expected)==0 and test == None:
            return
        if isinstance(expected, Mapping) and not expected.keys() and test == None:
            return
        if test != expected:
            raise AssertionError(expand_template("{{test}} != {{expected}}", locals()))
        return

    num_param = 0
    if digits != None:
        num_param += 1
    if places != None:
        num_param += 1
    if delta != None:
        num_param += 1
    if num_param>1:
        raise TypeError("specify only one of digits, places or delta")

    if digits is not None:
        with suppress_exception:
            diff = Math.log10(abs(test-expected))
            if diff < digits:
                return

        standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals())
    elif delta is not None:
        if abs(test - expected) <= delta:
            return

        standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals())
    else:
        if places is None:
            places = 15

        with suppress_exception:
            diff = Math.log10(abs(test-expected))
            if diff < Math.ceiling(Math.log10(abs(test)))-places:
                return


        standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals())

    raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
Exemplo n.º 7
0
def intervals(_min, _max=None, size=1):
    """
    RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE
    THE LAST PAIR MAY BE SMALLER
    Yes!  It's just like range(), only cooler!
    """
    if _max == None:
        _max = _min
        _min = 0
    _max = int(Math.ceiling(_max))
    _min = int(Math.floor(_min))

    output = ((x, min(x + size, _max)) for x in __builtin__.range(_min, _max, size))
    return output
Exemplo n.º 8
0
def intervals(_min, _max=None, size=1):
    """
    RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE
    THE LAST PAIR MAY BE SMALLER
    Yes!  It's just like range(), only cooler!
    """
    if _max == None:
        _max = _min
        _min = 0
    _max = int(Math.ceiling(_max))
    _min = int(Math.floor(_min))

    output = ((x, min(x + size, _max)) for x in __builtin__.range(_min, _max, size))
    return output
Exemplo n.º 9
0
def ighmm_log_gamma_sum(log_a, s, parent):
    max = 1.0
    argmax = 0

    # shortcut for the trivial case
    if parent.gamma_states == 1:
        return parent.gamma_a[0] + log_a[parent.gamma_id[0]]

    logP = ARRAY_MALLOC(len(s.in_a))

    # calculate logs of a[k,l]*gamma[k,hi] as sums of logs and find maximum:
    for j in range(len(s.in_a)):
        # search for state j_id in the gamma list
        for k in range(0, parent.gamma_states):
            if parent.gamma_id[k] == j:
                break
        if k == parent.gamma_states:
            logP[j] = 1.0
        else:
            logP[j] = log_a[j] + parent.gamma_a[k]
            if max == 1.0 or (logP[j] > max and logP[j] != 1.0):
                max = logP[j]
                argmax = j

    # calculate max+Math.log(1+sum[j!=argmax exp(logP[j]-max)])
    result = 1.0
    for j in range(len(s.in_a)):
        if j != argmax and logP[j] != 1.0:
            result += exp(logP[j] - max)

    result = Math.log(result)
    result += max
    return result
Exemplo n.º 10
0
    def pop(self, wait=SECOND, till=None):
        m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not m:
            return None

        self.pending.append(m)
        return convert.json2value(m.get_body())
Exemplo n.º 11
0
    def pdf(self, data):

        # XXX assume root as first index
        assert self.parents[0] == -1
        assert self.w[0] == 0.0

        res = np.zeros(len(data))

        for i in range(len(data)):
            res[i] = Math.log((1.0 / (math.sqrt(2.0 * math.pi) * self.variance[0])) * math.exp(( data[i, 0] - self.mean[0]  ) ** 2 / (-2.0 * self.variance[0] ** 2)))
            for j in range(1, self.dimension):
                pind = self.parents[j]
                res[i] += Math.log(
                    (1.0 / (math.sqrt(2.0 * math.pi) * self.variance[j])) * math.exp(( data[i, j] - self.mean[j] - self.w[j] * ( data[i, pind] - self.mean[pind] )  ) ** 2 / (-2.0 * self.variance[j] ** 2)))

        return res
Exemplo n.º 12
0
 def convert(self, expr):
     """
     ADD THE ".$value" SUFFIX TO ALL VARIABLES
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif Math.is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_keyword(expr):
         #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX
         return expr + ".$value"
     elif isinstance(expr, basestring):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif isinstance(expr, Query):
         return self._convert_query(expr)
     elif isinstance(expr, Mapping):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({name: self.convert(value) for name, value in expr.items()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return self.converter_map.get(k, self._convert_bop)(k, v)
     elif isinstance(expr, (list, set, tuple)):
         return wrap([self.convert(value) for value in expr])
Exemplo n.º 13
0
 def sample(self, native=False):
     if native:
         return random.normalvariate(self.mean, self.variance)
     else:
         r2 = -2.0 * Math.log(random_mt.float23())   # r2 ~ chi-square(2)
         theta = 2.0 * math.pi * random_mt.float23()  # theta ~ uniform(0, 2 \pi)
         return math.sqrt(self.variance) * math.sqrt(r2) * math.cos(theta) + self.mean
Exemplo n.º 14
0
def wrap_gsl_dirichlet_lnpdf(alpha, x):
    if hasattr(x[0], "__iter__"):
        output = [wrap_gsl_dirichlet_lnpdf(alpha, xi) for xi in x]
    else:
        output = Math.log(special.gamma(sum(alpha))) - np.sum(np.log(special.gamma(alpha))) + np.sum(np.log([xi ** (ai - 1.0) for xi, ai in zip(x, alpha)]))

    return output
Exemplo n.º 15
0
def parse(*args):
    try:
        if len(args) == 1:
            a0 = args[0]
            if isinstance(a0, (datetime, date)):
                output = unix2Date(datetime2unix(a0))
            elif isinstance(a0, Date):
                output = unix2Date(a0.unix)
            elif isinstance(a0, (int, long, float, Decimal)):
                a0 = float(a0)
                if a0 > 9999999999:    # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP
                    output = unix2Date(a0 / 1000)
                else:
                    output = unix2Date(a0)
            elif isinstance(a0, basestring) and len(a0) in [9, 10, 12, 13] and Math.is_integer(a0):
                a0 = float(a0)
                if a0 > 9999999999:    # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP
                    output = unix2Date(a0 / 1000)
                else:
                    output = unix2Date(a0)
            elif isinstance(a0, basestring):
                output = unicode2Date(a0)
            else:
                output = unix2Date(datetime2unix(datetime(*args)))
        else:
            if isinstance(args[0], basestring):
                output = unicode2Date(*args)
            else:
                output = unix2Date(datetime2unix(datetime(*args)))

        return output
    except Exception, e:
        from pyLibrary.debugs.logs import Log

        Log.error("Can not convert {{args}} to Date", args=args, cause=e)
Exemplo n.º 16
0
 def end(self):
     ignore = Math.ceiling(len(self.samples) * (1 - self.middle) / 2)
     if ignore * 2 >= len(self.samples):
         return stats.Stats()
     output = stats.Stats(samples=sorted(self.samples)[ignore : len(self.samples) - ignore :])
     output.samples = list(self.samples)
     return output
Exemplo n.º 17
0
 def __init__(self, edge, query, limit):
     AggsDecoder.__init__(self, edge, query, limit)
     self.domain = edge.domain
     self.domain.limit =Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
     self.parts = list()
     self.key2index = {}
     self.computed_domain = False
Exemplo n.º 18
0
 def quote_value(self, value):
     """
     convert values to mysql code for the same
     mostly delegate directly to the mysql lib, but some exceptions exist
     """
     try:
         if value == None:
             return "NULL"
         elif isinstance(value, SQL):
             if not value.param:
                 # value.template CAN BE MORE THAN A TEMPLATE STRING
                 return self.quote_sql(value.template)
             param = {k: self.quote_sql(v) for k, v in value.param.items()}
             return expand_template(value.template, param)
         elif isinstance(value, basestring):
             return self.db.literal(value)
         elif isinstance(value, datetime):
             return "str_to_date('" + value.strftime("%Y%m%d%H%M%S") + "', '%Y%m%d%H%i%s')"
         elif hasattr(value, '__iter__'):
             return self.db.literal(json_encode(value))
         elif isinstance(value, Mapping):
             return self.db.literal(json_encode(value))
         elif Math.is_number(value):
             return unicode(value)
         else:
             return self.db.literal(value)
     except Exception, e:
         Log.error("problem quoting SQL", e)
Exemplo n.º 19
0
 def convert(self, expr):
     """
     EXPAND INSTANCES OF name TO value
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif Math.is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_keyword(expr):
         return coalesce(self.dimensions[expr], expr)
     elif isinstance(expr, basestring):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif isinstance(expr, Query):
         return self._convert_query(expr)
     elif isinstance(expr, Mapping):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({name: self.convert(value) for name, value in expr.leaves()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return converter_map.get(k, self._convert_bop)(self, k, v)
     elif isinstance(expr, (list, set, tuple)):
         return wrap([self.convert(value) for value in expr])
     else:
         return expr
Exemplo n.º 20
0
def icompressed2ibytes(source):
    """
    :param source: GENERATOR OF COMPRESSED BYTES
    :return: GENERATOR OF BYTES
    """
    decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
    last_bytes_count = 0  # Track the last byte count, so we do not show too many debug lines
    bytes_count = 0
    for bytes_ in source:
        data = decompressor.decompress(bytes_)
        bytes_count += len(data)
        if Math.floor(last_bytes_count, 1000000) != Math.floor(bytes_count, 1000000):
            last_bytes_count = bytes_count
            if DEBUG:
                Log.note("bytes={{bytes}}", bytes=bytes_count)
        yield data
Exemplo n.º 21
0
        def int2Partition(value):
            if Math.round(value) == 0:
                return edge.domain.NULL

            d = datetime(str(value)[:4:], str(value)[-2:], 1)
            d = d.addMilli(offset)
            return edge.domain.getPartByKey(d)
Exemplo n.º 22
0
 def test_mod(self):
     self.assertEqual(Math.mod(12, 12), 0)
     self.assertEqual(Math.mod(11, 12), 11)
     self.assertEqual(Math.mod(2, 12), 2)
     self.assertEqual(Math.mod(1, 12), 1)
     self.assertEqual(Math.mod(-0, 12), 0)
     self.assertEqual(Math.mod(-1, 12), 11)
     self.assertEqual(Math.mod(-2, 12), 10)
     self.assertEqual(Math.mod(-12, 12), 0)
Exemplo n.º 23
0
def get_all_vars(expr):
    if expr == None:
        return set()
    elif isinstance(expr, unicode):
        if expr == "." or is_keyword(expr):
            return set([expr])
        else:
            Log.error("Expecting a json path")
    elif expr is True:
        return set()
    elif expr is False:
        return set()
    elif Math.is_number(expr):
        return set()

    op, term = expr.items()[0]

    mop = ruby_multi_operators.get(op)
    if mop:
        if isinstance(term, list):
            output = set()
            for t in term:
                output |= get_all_vars(t)
            return output
        elif isinstance(term, Mapping):
            a, b = term.items()[0]
            return get_all_vars(a) | get_all_vars(b)
        else:
            get_all_vars(term)

    bop = ruby_binary_operators.get(op)
    if bop:
        if isinstance(term, list):
            output = set()
            for t in term:
                output |= get_all_vars(t)
            return output
        elif isinstance(term, Mapping):
            if op == "eq":
                output = set()
                for a, b in term.items():
                    output |= get_all_vars(
                        a)  # {k:v} k IS VARIABLE, v IS A VALUE
                return output
            else:
                a, b = term.items()[0]
                return get_all_vars(a)
        else:
            Log.error("Expecting binary term")

    uop = ruby_unary_operators.get(op)
    if uop:
        return get_all_vars(term)

    cop = complex_operators.get(op)
    if cop:
        return cop(op, term).vars()

    Log.error("`{{op}}` is not a recognized operation", op=op)
Exemplo n.º 24
0
    def wrap(query, schema=None):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if isinstance(query, QueryOp) or query == None:
            return query

        query = wrap(query)

        output = QueryOp("from", None)
        output.format = query.format
        output.frum = wrap_from(query["from"], schema=schema)
        if not schema and isinstance(output.frum, Schema):
            schema = output.frum

        if query.select:
            output.select = _normalize_selects(query.select, query.frum, schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = Dict(name="count", value=jx_expression("."), aggregate="count", default=0)
            else:
                output.select = _normalize_selects(".", query["from"])

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            output.edges = _normalize_edges(query.edges, schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby, schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where(query.where, schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
Exemplo n.º 25
0
def get_all_vars(expr):
    if expr == None:
        return set()
    elif isinstance(expr, unicode):
        if expr == "." or is_keyword(expr):
            return set([expr])
        else:
            Log.error("Expecting a json path")
    elif expr is True:
        return set()
    elif expr is False:
        return set()
    elif Math.is_number(expr):
        return set()

    op, term = expr.items()[0]

    mop = ruby_multi_operators.get(op)
    if mop:
        if isinstance(term, list):
            output = set()
            for t in term:
                output |= get_all_vars(t)
            return output
        elif isinstance(term, Mapping):
            a, b = term.items()[0]
            return get_all_vars(a) | get_all_vars(b)
        else:
            get_all_vars(term)

    bop = ruby_binary_operators.get(op)
    if bop:
        if isinstance(term, list):
            output = set()
            for t in term:
                output |= get_all_vars(t)
            return output
        elif isinstance(term, Mapping):
            if op == "eq":
                output = set()
                for a, b in term.items():
                    output |= get_all_vars(a)  # {k:v} k IS VARIABLE, v IS A VALUE
                return output
            else:
                a, b = term.items()[0]
                return get_all_vars(a)
        else:
            Log.error("Expecting binary term")

    uop = ruby_unary_operators.get(op)
    if uop:
        return get_all_vars(term)

    cop = complex_operators.get(op)
    if cop:
        return cop(op, term).vars()

    Log.error("`{{op}}` is not a recognized operation",  op= op)
Exemplo n.º 26
0
def value2query(value):
    if isinstance(value, datetime):
        return convert.datetime2milli(value)
    if isinstance(value, Duration):
        return value.milli

    if Math.is_number(value):
        return value
    return convert.string2quote(value)
Exemplo n.º 27
0
def Viterbi_precompute(mo, o, len, v):
    # Precomputing the Math.log(a_ij)
    for j in range(mo.N):
        for i in range(mo.N):
            if mo.s[j].in_a[i] == 0.0:        # DBL_EPSILON ?
                v.log_in_a[j][i] = +1 # Not used any further in the calculations
            else:
                v.log_in_a[j][i] = Math.log(mo.s[j].in_a[i])



    # Precomputing the Math.log(bj(ot))
    for j in range(mo.N):
        for t in range(mo.M):
            if mo.s[j].b[t] == 0.0:    # DBL_EPSILON ?
                v.log_b[j][t] = +1
            else:
                v.log_b[j][t] = Math.log(mo.s[j].b[t])
Exemplo n.º 28
0
    def floor(self, interval=None):
        if not isinstance(interval, Duration):
            from pyLibrary.debugs.logs import Log
            Log.error("Expecting an interval as a Duration object")

        output = Duration(0)
        if interval.month:
            if self.month:
                output.month = int(Math.floor(self.month / interval.month) * interval.month)
                output.milli = output.month * MILLI_VALUES.month
                return output

            # A MONTH OF DURATION IS BIGGER THAN A CANONICAL MONTH
            output.month = int(Math.floor(self.milli * 12 / MILLI_VALUES["year"] / interval.month) * interval.month)
            output.milli = output.month * MILLI_VALUES.month
        else:
            output.milli = Math.floor(self.milli / (interval.milli)) * (interval.milli)
        return output
Exemplo n.º 29
0
def kbest_buildLogMatrix(s, N):
    # create & initialize matrix:
    log_a = ARRAY_MALLOC(N)
    for i in range(0, N):
        log_a[i] = ARRAY_MALLOC(N)
        for j in range(N):
            log_a[i][j] = Math.log(s[i].in_a[j])

    return log_a
Exemplo n.º 30
0
    def find_keys(self, start, count, filter=None):
        digits = int(Math.ceiling(log10(count - 1)))
        prefix = unicode(start)[:-digits]

        metas = self.bucket.metas(prefix=prefix)
        min_ = Version(unicode(start))
        max_ = Version(unicode(start+count))
        output = [m.key for m in metas if min_ <= Version(m.key) < max_]

        return set(output)
Exemplo n.º 31
0
def sviterbi_precompute(smo, O, T, v):
    # Precomputing of Math.log(b_j(O_t))
    for t in range(T):
        for j in range(smo.N):
            cb = smo.s[j].calc_b(O[t])
            if cb == 0.0:
            # DBL_EPSILON ?
                v.log_b[j][t] = -DBL_MAX
            else:
                v.log_b[j][t] = Math.log(cb)
Exemplo n.º 32
0
    def pop_message(self, wait=SECOND, till=None):
        """
        RETURN THE MESSAGE, CALLER IS RESPONSIBLE FOR CALLING delete_message() WHEN DONE
        """
        m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not m:
            return None

        output = convert.json2value(m.get_body())
        return output