def __div__(self, amount): if isinstance(amount, Duration) and amount.month: m = self.month r = self.milli # DO NOT CONSIDER TIME OF DAY tod = r % MILLI_VALUES.day r = r - tod if m == 0 and r > (MILLI_VALUES.year / 3): m = Math.floor(12 * self.milli / MILLI_VALUES.year) r -= (m / 12) * MILLI_VALUES.year else: r = r - (self.month * MILLI_VALUES.month) if r >= MILLI_VALUES.day * 31: from pyLibrary.debugs.logs import Log Log.error("Do not know how to handle") r = MIN(29 / 30, (r + tod) / (MILLI_VALUES.day * 30)) output = Math.floor(m / amount.month) + r return output elif Math.is_number(amount): output = Duration(0) output.milli = self.milli / amount output.month = self.month / amount return output else: return self.milli / amount.milli
def __new__(cls, value=None, **kwargs): output = object.__new__(cls) if value == None: if kwargs: output.milli = datetime.timedelta(**kwargs).total_seconds() * 1000 output.month = 0 return output else: return None if Math.is_number(value): output._milli = float(value) * 1000 output.month = 0 return output elif isinstance(value, basestring): return parse(value) elif isinstance(value, Duration): output.milli = value.milli output.month = value.month return output elif isinstance(value, float) and Math.is_nan(value): return None else: from pyLibrary import convert from pyLibrary.debugs.logs import Log Log.error("Do not know type of object (" + convert.value2json(value) + ")of to make a Duration")
def __init__(self, **desc): Domain.__init__(self, **desc) self.type = "range" self.NULL = Null if self.partitions: # IGNORE THE min, max, interval if not self.key: Log.error("Must have a key value") parts = listwrap(self.partitions) for i, p in enumerate(parts): self.min = Math.min(self.min, p.min) self.max = Math.max(self.max, p.max) if p.dataIndex != None and p.dataIndex != i: Log.error("Expecting `dataIndex` to agree with the order of the parts") if p[self.key] == None: Log.error("Expecting all parts to have {{key}} as a property", key=self.key) p.dataIndex = i # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE for p, q in itertools.product(parts, parts): if p.min <= q.min and q.min < p.max: Log.error("partitions overlap!") self.partitions = parts return elif any([self.min == None, self.max == None, self.interval == None]): Log.error("Can not handle missing parameter") self.key = "min" self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])
def geo_mean(values): """ GIVEN AN ARRAY OF dicts, CALC THE GEO-MEAN ON EACH ATTRIBUTE """ agg = Struct() for d in values: for k, v in d.items(): if v != 0: agg[k] = nvl(agg[k], ZeroMoment.new_instance()) + Math.log(Math.abs(v)) return {k: Math.exp(v.stats.mean) for k, v in agg.items()}
def test_floor_mod_identity_w_ints(self): for i in range(100): x = Random.float()*200 - 100.0 m = floor(abs(random.gauss(0, 5))) if m == 0: self.assertEqual(Math.floor(x, m), None) self.assertEqual(Math.mod(x, m), None) else: self.assertAlmostEqual(Math.floor(x, m)+Math.mod(x, m), x, places=7)
def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None): """ Snagged from unittest/case.py, then modified (Aug2014) """ if expected == None: # None has no expectations return if test == expected: # shortcut return if not Math.is_number(expected): # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL if isinstance(expected, list) and len(expected)==0 and test == None: return if isinstance(expected, Mapping) and not expected.keys() and test == None: return if test != expected: raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) return num_param = 0 if digits != None: num_param += 1 if places != None: num_param += 1 if delta != None: num_param += 1 if num_param>1: raise TypeError("specify only one of digits, places or delta") if digits is not None: with suppress_exception: diff = Math.log10(abs(test-expected)) if diff < digits: return standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals()) elif delta is not None: if abs(test - expected) <= delta: return standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals()) else: if places is None: places = 15 with suppress_exception: diff = Math.log10(abs(test-expected)) if diff < Math.ceiling(Math.log10(abs(test)))-places: return standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals()) raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
def intervals(_min, _max=None, size=1): """ RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE THE LAST PAIR MAY BE SMALLER Yes! It's just like range(), only cooler! """ if _max == None: _max = _min _min = 0 _max = int(Math.ceiling(_max)) _min = int(Math.floor(_min)) output = ((x, min(x + size, _max)) for x in __builtin__.range(_min, _max, size)) return output
def ighmm_log_gamma_sum(log_a, s, parent): max = 1.0 argmax = 0 # shortcut for the trivial case if parent.gamma_states == 1: return parent.gamma_a[0] + log_a[parent.gamma_id[0]] logP = ARRAY_MALLOC(len(s.in_a)) # calculate logs of a[k,l]*gamma[k,hi] as sums of logs and find maximum: for j in range(len(s.in_a)): # search for state j_id in the gamma list for k in range(0, parent.gamma_states): if parent.gamma_id[k] == j: break if k == parent.gamma_states: logP[j] = 1.0 else: logP[j] = log_a[j] + parent.gamma_a[k] if max == 1.0 or (logP[j] > max and logP[j] != 1.0): max = logP[j] argmax = j # calculate max+Math.log(1+sum[j!=argmax exp(logP[j]-max)]) result = 1.0 for j in range(len(s.in_a)): if j != argmax and logP[j] != 1.0: result += exp(logP[j] - max) result = Math.log(result) result += max return result
def pop(self, wait=SECOND, till=None): m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) if not m: return None self.pending.append(m) return convert.json2value(m.get_body())
def pdf(self, data): # XXX assume root as first index assert self.parents[0] == -1 assert self.w[0] == 0.0 res = np.zeros(len(data)) for i in range(len(data)): res[i] = Math.log((1.0 / (math.sqrt(2.0 * math.pi) * self.variance[0])) * math.exp(( data[i, 0] - self.mean[0] ) ** 2 / (-2.0 * self.variance[0] ** 2))) for j in range(1, self.dimension): pind = self.parents[j] res[i] += Math.log( (1.0 / (math.sqrt(2.0 * math.pi) * self.variance[j])) * math.exp(( data[i, j] - self.mean[j] - self.w[j] * ( data[i, pind] - self.mean[pind] ) ) ** 2 / (-2.0 * self.variance[j] ** 2))) return res
def convert(self, expr): """ ADD THE ".$value" SUFFIX TO ALL VARIABLES """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_keyword(expr): #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX return expr + ".$value" elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, Query): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.items()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return self.converter_map.get(k, self._convert_bop)(k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr])
def sample(self, native=False): if native: return random.normalvariate(self.mean, self.variance) else: r2 = -2.0 * Math.log(random_mt.float23()) # r2 ~ chi-square(2) theta = 2.0 * math.pi * random_mt.float23() # theta ~ uniform(0, 2 \pi) return math.sqrt(self.variance) * math.sqrt(r2) * math.cos(theta) + self.mean
def wrap_gsl_dirichlet_lnpdf(alpha, x): if hasattr(x[0], "__iter__"): output = [wrap_gsl_dirichlet_lnpdf(alpha, xi) for xi in x] else: output = Math.log(special.gamma(sum(alpha))) - np.sum(np.log(special.gamma(alpha))) + np.sum(np.log([xi ** (ai - 1.0) for xi, ai in zip(x, alpha)])) return output
def parse(*args): try: if len(args) == 1: a0 = args[0] if isinstance(a0, (datetime, date)): output = unix2Date(datetime2unix(a0)) elif isinstance(a0, Date): output = unix2Date(a0.unix) elif isinstance(a0, (int, long, float, Decimal)): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = unix2Date(a0 / 1000) else: output = unix2Date(a0) elif isinstance(a0, basestring) and len(a0) in [9, 10, 12, 13] and Math.is_integer(a0): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = unix2Date(a0 / 1000) else: output = unix2Date(a0) elif isinstance(a0, basestring): output = unicode2Date(a0) else: output = unix2Date(datetime2unix(datetime(*args))) else: if isinstance(args[0], basestring): output = unicode2Date(*args) else: output = unix2Date(datetime2unix(datetime(*args))) return output except Exception, e: from pyLibrary.debugs.logs import Log Log.error("Can not convert {{args}} to Date", args=args, cause=e)
def end(self): ignore = Math.ceiling(len(self.samples) * (1 - self.middle) / 2) if ignore * 2 >= len(self.samples): return stats.Stats() output = stats.Stats(samples=sorted(self.samples)[ignore : len(self.samples) - ignore :]) output.samples = list(self.samples) return output
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit =Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False
def quote_value(self, value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return "NULL" elif isinstance(value, SQL): if not value.param: # value.template CAN BE MORE THAN A TEMPLATE STRING return self.quote_sql(value.template) param = {k: self.quote_sql(v) for k, v in value.param.items()} return expand_template(value.template, param) elif isinstance(value, basestring): return self.db.literal(value) elif isinstance(value, datetime): return "str_to_date('" + value.strftime("%Y%m%d%H%M%S") + "', '%Y%m%d%H%i%s')" elif hasattr(value, '__iter__'): return self.db.literal(json_encode(value)) elif isinstance(value, Mapping): return self.db.literal(json_encode(value)) elif Math.is_number(value): return unicode(value) else: return self.db.literal(value) except Exception, e: Log.error("problem quoting SQL", e)
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_keyword(expr): return coalesce(self.dimensions[expr], expr) elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, Query): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr]) else: return expr
def icompressed2ibytes(source): """ :param source: GENERATOR OF COMPRESSED BYTES :return: GENERATOR OF BYTES """ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) last_bytes_count = 0 # Track the last byte count, so we do not show too many debug lines bytes_count = 0 for bytes_ in source: data = decompressor.decompress(bytes_) bytes_count += len(data) if Math.floor(last_bytes_count, 1000000) != Math.floor(bytes_count, 1000000): last_bytes_count = bytes_count if DEBUG: Log.note("bytes={{bytes}}", bytes=bytes_count) yield data
def int2Partition(value): if Math.round(value) == 0: return edge.domain.NULL d = datetime(str(value)[:4:], str(value)[-2:], 1) d = d.addMilli(offset) return edge.domain.getPartByKey(d)
def test_mod(self): self.assertEqual(Math.mod(12, 12), 0) self.assertEqual(Math.mod(11, 12), 11) self.assertEqual(Math.mod(2, 12), 2) self.assertEqual(Math.mod(1, 12), 1) self.assertEqual(Math.mod(-0, 12), 0) self.assertEqual(Math.mod(-1, 12), 11) self.assertEqual(Math.mod(-2, 12), 10) self.assertEqual(Math.mod(-12, 12), 0)
def get_all_vars(expr): if expr == None: return set() elif isinstance(expr, unicode): if expr == "." or is_keyword(expr): return set([expr]) else: Log.error("Expecting a json path") elif expr is True: return set() elif expr is False: return set() elif Math.is_number(expr): return set() op, term = expr.items()[0] mop = ruby_multi_operators.get(op) if mop: if isinstance(term, list): output = set() for t in term: output |= get_all_vars(t) return output elif isinstance(term, Mapping): a, b = term.items()[0] return get_all_vars(a) | get_all_vars(b) else: get_all_vars(term) bop = ruby_binary_operators.get(op) if bop: if isinstance(term, list): output = set() for t in term: output |= get_all_vars(t) return output elif isinstance(term, Mapping): if op == "eq": output = set() for a, b in term.items(): output |= get_all_vars( a) # {k:v} k IS VARIABLE, v IS A VALUE return output else: a, b = term.items()[0] return get_all_vars(a) else: Log.error("Expecting binary term") uop = ruby_unary_operators.get(op) if uop: return get_all_vars(term) cop = complex_operators.get(op) if cop: return cop(op, term).vars() Log.error("`{{op}}` is not a recognized operation", op=op)
def wrap(query, schema=None): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, QueryOp) or query == None: return query query = wrap(query) output = QueryOp("from", None) output.format = query.format output.frum = wrap_from(query["from"], schema=schema) if not schema and isinstance(output.frum, Schema): schema = output.frum if query.select: output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = Dict(name="count", value=jx_expression("."), aggregate="count", default=0) else: output.select = _normalize_selects(".", query["from"]) if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: output.edges = _normalize_edges(query.edges, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def get_all_vars(expr): if expr == None: return set() elif isinstance(expr, unicode): if expr == "." or is_keyword(expr): return set([expr]) else: Log.error("Expecting a json path") elif expr is True: return set() elif expr is False: return set() elif Math.is_number(expr): return set() op, term = expr.items()[0] mop = ruby_multi_operators.get(op) if mop: if isinstance(term, list): output = set() for t in term: output |= get_all_vars(t) return output elif isinstance(term, Mapping): a, b = term.items()[0] return get_all_vars(a) | get_all_vars(b) else: get_all_vars(term) bop = ruby_binary_operators.get(op) if bop: if isinstance(term, list): output = set() for t in term: output |= get_all_vars(t) return output elif isinstance(term, Mapping): if op == "eq": output = set() for a, b in term.items(): output |= get_all_vars(a) # {k:v} k IS VARIABLE, v IS A VALUE return output else: a, b = term.items()[0] return get_all_vars(a) else: Log.error("Expecting binary term") uop = ruby_unary_operators.get(op) if uop: return get_all_vars(term) cop = complex_operators.get(op) if cop: return cop(op, term).vars() Log.error("`{{op}}` is not a recognized operation", op= op)
def value2query(value): if isinstance(value, datetime): return convert.datetime2milli(value) if isinstance(value, Duration): return value.milli if Math.is_number(value): return value return convert.string2quote(value)
def Viterbi_precompute(mo, o, len, v): # Precomputing the Math.log(a_ij) for j in range(mo.N): for i in range(mo.N): if mo.s[j].in_a[i] == 0.0: # DBL_EPSILON ? v.log_in_a[j][i] = +1 # Not used any further in the calculations else: v.log_in_a[j][i] = Math.log(mo.s[j].in_a[i]) # Precomputing the Math.log(bj(ot)) for j in range(mo.N): for t in range(mo.M): if mo.s[j].b[t] == 0.0: # DBL_EPSILON ? v.log_b[j][t] = +1 else: v.log_b[j][t] = Math.log(mo.s[j].b[t])
def floor(self, interval=None): if not isinstance(interval, Duration): from pyLibrary.debugs.logs import Log Log.error("Expecting an interval as a Duration object") output = Duration(0) if interval.month: if self.month: output.month = int(Math.floor(self.month / interval.month) * interval.month) output.milli = output.month * MILLI_VALUES.month return output # A MONTH OF DURATION IS BIGGER THAN A CANONICAL MONTH output.month = int(Math.floor(self.milli * 12 / MILLI_VALUES["year"] / interval.month) * interval.month) output.milli = output.month * MILLI_VALUES.month else: output.milli = Math.floor(self.milli / (interval.milli)) * (interval.milli) return output
def kbest_buildLogMatrix(s, N): # create & initialize matrix: log_a = ARRAY_MALLOC(N) for i in range(0, N): log_a[i] = ARRAY_MALLOC(N) for j in range(N): log_a[i][j] = Math.log(s[i].in_a[j]) return log_a
def find_keys(self, start, count, filter=None): digits = int(Math.ceiling(log10(count - 1))) prefix = unicode(start)[:-digits] metas = self.bucket.metas(prefix=prefix) min_ = Version(unicode(start)) max_ = Version(unicode(start+count)) output = [m.key for m in metas if min_ <= Version(m.key) < max_] return set(output)
def sviterbi_precompute(smo, O, T, v): # Precomputing of Math.log(b_j(O_t)) for t in range(T): for j in range(smo.N): cb = smo.s[j].calc_b(O[t]) if cb == 0.0: # DBL_EPSILON ? v.log_b[j][t] = -DBL_MAX else: v.log_b[j][t] = Math.log(cb)
def pop_message(self, wait=SECOND, till=None): """ RETURN THE MESSAGE, CALLER IS RESPONSIBLE FOR CALLING delete_message() WHEN DONE """ m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds)) if not m: return None output = convert.json2value(m.get_body()) return output