def __init__(self, **desc): desc = wrap(desc) self._set_slots_to_null(self.__class__) set_default(self, desc) self.name = coalesce(desc.name, desc.type) self.isFacet = coalesce(desc.isFacet, False) self.dimension = Null
def _range_composer(edge, domain, es_query, to_float): # USE RANGES _min = coalesce(domain.min, MAX(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) if isinstance(edge.value, Variable): calc = {"field": edge.value.var} else: calc = {"script_field": edge.value.to_ruby()} if edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER missing_filter = set_default( {"filter": {"or": [ OrOp("or", [ InequalityOp("lt", [edge.value, Literal(None, to_float(_min))]), InequalityOp("gte", [edge.value, Literal(None, to_float(_max))]), ]).to_esfilter(), edge.value.missing().to_esfilter() ]}}, es_query ) else: missing_filter = None return wrap({"aggs": { "_match": set_default( {"range": calc}, {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}}, es_query ), "_missing": missing_filter }})
def Stats2ZeroMoment(stats): # MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html # ADDED count mc0, mc1, mc2, skew, kurt = stats.count, coalesce(stats.mean, 0), coalesce(stats.variance, 0), coalesce(stats.skew, 0), coalesce(stats.kurtosis, 0) mz0 = mc0 mz1 = mc1 * mc0 mz2 = (mc2 + mc1 * mc1) * mc0 mc3 = coalesce(skew, 0) * (mc2 ** 1.5) # 3rd central moment mz3 = (mc3 + 3 * mc1 * mc2 + mc1 ** 3) * mc0 # 3rd non-central moment mc4 = (coalesce(kurt, 0) + 3.0) * (mc2 ** 2.0) # 4th central moment mz4 = (mc4 + 4 * mc1 * mc3 + 6 * mc1 * mc1 * mc2 + mc1 ** 4) * mc0 m = ZeroMoment(mz0, mz1, mz2, mz3, mz4) if DEBUG: from pyLibrary.testing.fuzzytestcase import assertAlmostEqualValue globals()["DEBUG"] = False try: v = ZeroMoment2Stats(m) assertAlmostEqualValue(v.count, stats.count, places=10) assertAlmostEqualValue(v.mean, stats.mean, places=10) assertAlmostEqualValue(v.variance, stats.variance, places=10) assertAlmostEqualValue(v.skew, stats.skew, places=10) assertAlmostEqualValue(v.kurtosis, stats.kurtosis, places=10) except Exception, e: v = ZeroMoment2Stats(m) Log.error("programmer error") globals()["DEBUG"] = True
def single(col, r): min = coalesce(r["gte"], r[">="]) max = coalesce(r["lte"], r["<="]) if min and max: # SPECIAL CASE (BETWEEN) return db.quote_column(col) + SQL(" BETWEEN ") + db.quote_value(min) + SQL(" AND ") + db.quote_value(max) else: return " AND ".join( db.quote_column(col) + name2sign[sign] + db.quote_value(value) for sign, value in r.items() )
def percent(value, decimal=None, digits=None, places=None): value = float(value) if value == 0.0: return "0%" digits = coalesce(digits, places) if digits != None: left_of_decimal = int(math.ceil(math.log10(abs(value)))) + 2 decimal = digits - left_of_decimal decimal = coalesce(decimal, 0) right_of_decimal = max(decimal, 0) format = "{:." + _unicode(right_of_decimal) + "%}" return format.format(__builtin__.round(value, decimal + 2))
def send_email(self, from_address=None, to_address=None, subject=None, text_data=None, html_data=None ): """Sends an email. from_addr is an email address; to_addrs is a list of email adresses. Addresses can be plain (e.g. "*****@*****.**") or with real names (e.g. "John Smith <*****@*****.**>"). text_data and html_data are both strings. You can specify one or both. If you specify both, the email will be sent as a MIME multipart alternative, i.e., the recipient will see the HTML content if his viewer supports it; otherwise he'll see the text content. """ settings = self.settings from_address = coalesce(from_address, settings["from"], settings.from_address) to_address = listwrap(coalesce(to_address, settings.to_address, settings.to_addrs)) if not from_address or not to_address: raise Exception("Both from_addr and to_addrs must be specified") if not text_data and not html_data: raise Exception("Must specify either text_data or html_data") if not html_data: msg = MIMEText(text_data) elif not text_data: msg = MIMEText(html_data, 'html') else: msg = MIMEMultipart('alternative') msg.attach(MIMEText(text_data, 'plain')) msg.attach(MIMEText(html_data, 'html')) msg['Subject'] = coalesce(subject, settings.subject) msg['From'] = from_address msg['To'] = ', '.join(to_address) if self.server: # CALL AS PART OF A SMTP SESSION self.server.sendmail(from_address, to_address, msg.as_string()) else: # CALL AS STAND-ALONE with self: self.server.sendmail(from_address, to_address, msg.as_string())
def _convert_edge(self, edge): if isinstance(edge, basestring): return Data( name=edge, value=edge, domain=self._convert_domain() ) else: edge = wrap(edge) if not edge.name and not isinstance(edge.value, basestring): Log.error("You must name compound edges: {{edge}}", edge= edge) if isinstance(edge.value, (Mapping, list)) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain =self._convert_domain() domain.dimension = Data(fields=edge.value) return Data( name=edge.name, allowNulls=False if edge.allowNulls is False else True, domain=domain ) domain = self._convert_domain(edge.domain) return Data( name=coalesce(edge.name, edge.value), value=edge.value, range=edge.range, allowNulls=False if edge.allowNulls is False else True, domain=domain )
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_keyword(expr): return coalesce(self.dimensions[expr], expr) elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, QueryOp): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr]) else: return expr
def compileDuration2Term(edge): if edge.esscript: Log.error("edge script not supported yet") # IS THERE A LIMIT ON THE DOMAIN? numPartitions = len(edge.domain.partitions) value = edge.value if isKeyword(value): value = "doc[\"" + value + "\"].value" ref = coalesce(edge.domain.min, edge.domain.max, durations.ZERO) nullTest = compileNullTest(edge) ms = edge.domain.interval.milli if edge.domain.interval.month > 0: ms = durations.YEAR.milli / 12 * edge.domain.interval.month partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + ms + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value))) return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit =Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False
def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(settings).get_or_create_index( schema=convert.json2value(convert.value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, settings=settings ) self.batch_size = batch_size self.es.add_alias(coalesce(settings.alias, settings.index)) self.queue = Queue("debug logs to es", max=max_size, silent=True) self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3) self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE)) Thread.run("add debug logs to es", self._insert_loop)
def query(self, sql, param=None): """ RETURN LIST OF dicts """ self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() self.cursor.execute("SET TIME_ZONE='+00:00'") self.cursor.close() self.cursor = self.db.cursor() if param: sql = expand_template(sql, self.quote_param(param)) sql = self.preamble + outdent(sql) if self.debug: Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] fixed = [[utf8_to_unicode(c) for c in row] for row in self.cursor] result = convert.table2list(columns, fixed) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None return result except Exception, e: if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql= sql, cause=e, stack_depth=1)
def __init__(self, host, index, alias=None, name=None, port=9200, settings=None): global _elasticsearch if hasattr(self, "settings"): return from pyLibrary.queries.containers.list_usingPythonList import ListContainer from pyLibrary.env import elasticsearch as _elasticsearch self.settings = settings self.default_name = coalesce(name, alias, index) self.default_es = _elasticsearch.Cluster(settings=settings) self.todo = Queue("refresh metadata", max=100000, unique=True) self.es_metadata = Null self.last_es_metadata = Date.now()-OLD_METADATA self.meta=Data() table_columns = metadata_tables() column_columns = metadata_columns() self.meta.tables = ListContainer("meta.tables", [], wrap({c.name: c for c in table_columns})) self.meta.columns = ColumnList() self.meta.columns.insert(column_columns) self.meta.columns.insert(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def es_terms(es, mvel, query): """ RETURN LIST OF ALL EDGE QUERIES EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION """ if len(query.edges) == 2: return _es_terms2(es, mvel, query) select = listwrap(query.select) FromES = build_es_query(query) packed_term = compileEdges2Term(mvel, query.edges, wrap([])) for s in select: FromES.facets[s.name] = { "terms": { "field": packed_term.field, "script_field": packed_term.expression, "size": coalesce(query.limit, 200000), }, "facet_filter": simplify_esfilter(query.where), } term2Parts = packed_term.term2parts data = es09.util.post(es, FromES, query.limit) # GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS # BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN for k, f in data.facets.items(): for t in f.terms: term2Parts(t.term) # NUMBER ALL EDGES FOR jx INDEXING for f, e in enumerate(query.edges): e.index = f if e.domain.type in ["uid", "default"]: # e.domain.partitions = jx.sort(e.domain.partitions, "value") for p, part in enumerate(e.domain.partitions): part.dataIndex = p e.domain.NULL.dataIndex = len(e.domain.partitions) # MAKE CUBE output = {} dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] for s in select: output[s.name] = Matrix(*dims) # FILL CUBE # EXPECTING ONLY SELECT CLAUSE FACETS for facetName, facet in data.facets.items(): for term in facet.terms: term_coord = term2Parts(term.term).dataIndex for s in select: try: output[s.name][term_coord] = term[aggregates[s.aggregate]] except Exception, e: # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS pass
def get_decoders_by_depth(query): """ RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH """ schema = query.frum output = FlatList() for e in wrap(coalesce(query.edges, query.groupby, [])): if e.value != None and not isinstance(e.value, NullOp): e = e.copy() vars_ = e.value.vars() for v in vars_: if not schema[v]: Log.error("{{var}} does not exist in schema", var=v) e.value = e.value.map({schema[v].name: schema[v].es_column for v in vars_}) elif e.range: e = e.copy() min_ = e.range.min max_ = e.range.max vars_ = min_.vars() | max_.vars() for v in vars_: if not schema[v]: Log.error("{{var}} does not exist in schema", var=v) map_ = {schema[v].name: schema[v].es_column for v in vars_} e.range = { "min": min_.map(map_), "max": max_.map(map_) } elif e.domain.dimension: vars_ = e.domain.dimension.fields e.domain.dimension = e.domain.dimension.copy() e.domain.dimension.fields = [schema[v].es_column for v in vars_] elif all(e.domain.partitions.where): vars_ = set() for p in e.domain.partitions: vars_ |= p.where.vars() try: depths = set(len(schema[v].nested_path)-1 for v in vars_) if -1 in depths: Log.error( "Do not know of column {{column}}", column=unwraplist([v for v in vars_ if schema[v]==None]) ) if len(depths) > 1: Log.error("expression {{expr}} spans tables, can not handle", expr=e.value) max_depth = Math.MAX(depths) while len(output) <= max_depth: output.append([]) except Exception, e: # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY max_depth = 0 output.append([]) limit = 0 output[max_depth].append(AggsDecoder(e, query, limit))
def es_setop(es, query): es_query, filters = es14.util.es_query_template(query.frum.name) set_default(filters[0], simplify_esfilter(query.where.to_esfilter())) es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT) es_query.sort = jx_sort_to_es_sort(query.sort) es_query.fields = FlatList() return extract_rows(es, es_query, query)
def get_index(self, row): domain = self.edge.domain part = row[self.start] if part == None: return len(domain.partitions) f = coalesce(part["from"], part["key"]) t = coalesce(part["to"], part["key"]) if f == None or t == None: return len(domain.partitions) else: for p in domain.partitions: if p.min <= f <p.max: return p.dataIndex sample = part.copy sample.buckets = None Log.error("Expecting to find {{part}}", part=sample)
def __init__(self, description, param=None, debug=True, silent=False): self.template = description self.param = wrap(coalesce(param, {})) self.debug = debug self.silent = silent self.start = 0 self.end = 0 self.interval = None
def start(cls, settings=None): """ RUN ME FIRST TO SETUP THE THREADED LOGGING http://victorlin.me/2012/08/good-logging-practice-in-python/ log - LIST OF PARAMETERS FOR LOGGER(S) trace - SHOW MORE DETAILS IN EVERY LOG LINE (default False) cprofile - True==ENABLE THE C-PROFILER THAT COMES WITH PYTHON (default False) USE THE LONG FORM TO SET THE FILENAME {"enabled": True, "filename": "cprofile.tab"} profile - True==ENABLE pyLibrary SIMPLE PROFILING (default False) (eg with Profiler("some description"):) USE THE LONG FORM TO SET FILENAME {"enabled": True, "filename": "profile.tab"} constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE) """ global _Thread if not settings: return settings = wrap(settings) cls.settings = settings cls.trace = cls.trace | coalesce(settings.trace, False) if cls.trace: from pyLibrary.thread.threads import Thread as _Thread if settings.cprofile is False: settings.cprofile = {"enabled": False} elif settings.cprofile is True or (isinstance(settings.cprofile, Mapping) and settings.cprofile.enabled): if isinstance(settings.cprofile, bool): settings.cprofile = {"enabled": True, "filename": "cprofile.tab"} import cProfile cls.cprofiler = cProfile.Profile() cls.cprofiler.enable() if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled): from pyLibrary.debugs import profiles if isinstance(settings.profile, bool): profiles.ON = True settings.profile = {"enabled": True, "filename": "profile.tab"} if settings.profile.enabled: profiles.ON = True if settings.constants: constants.set(settings.constants) if settings.log: cls.logging_multi = TextLog_usingMulti() if cls.main_log: cls.main_log.stop() cls.main_log = TextLog_usingThread(cls.logging_multi) for log in listwrap(settings.log): Log.add_log(Log.new_instance(log)) if settings.cprofile.enabled==True: Log.alert("cprofiling is enabled, writing to {{filename}}", filename=os.path.abspath(settings.cprofile.filename))
def select(self, fields): if isinstance(fields, Mapping): fields=fields.value if isinstance(fields, basestring): # RETURN LIST OF VALUES if len(split_field(fields)) == 1: if self.path[0] == fields: return [d[1] for d in self.data] else: return [d[0][fields] for d in self.data] else: keys = split_field(fields) depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX short_key = keys[depth:] output = FlatList() _select1((wrap(d[depth]) for d in self.data), short_key, 0, output) return output if isinstance(fields, list): output = FlatList() meta = [] for f in fields: if hasattr(f.value, "__call__"): meta.append((f.name, f.value)) else: meta.append((f.name, functools.partial(lambda v, d: d[v], f.value))) for row in self._values(): agg = Data() for name, f in meta: agg[name] = f(row) output.append(agg) return output # meta = [] # for f in fields: # keys = split_field(f.value) # depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path)) # LENGTH OF COMMON PREFIX # short_key = join_field(keys[depth:]) # # meta.append((f.name, depth, short_key)) # # for row in self._data: # agg = Data() # for name, depth, short_key in meta: # if short_key: # agg[name] = row[depth][short_key] # else: # agg[name] = row[depth] # output.append(agg) # return output Log.error("multiselect over FlatList not supported")
def getDomain(self, **kwargs): # kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS kwargs = wrap(kwargs) kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if isinstance(self.fields, list) else None) if not self.partitions and self.edges: # USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP partitions = [ { "name": v.name, "value": v.name, "where": v.where, "style": v.style, "weight": v.weight # YO! WHAT DO WE *NOT* COPY? } for i, v in enumerate(self.edges) if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT) and v.where ] self.isFacet = True elif kwargs.depth == None: # ASSUME self.fields IS A dict partitions = FlatList() for i, part in enumerate(self.partitions): if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT): break partitions.append({ "name":part.name, "value":part.value, "where":part.where, "style":coalesce(part.style, part.parent.style), "weight":part.weight # YO! WHAT DO WE *NOT* COPY? }) elif kwargs.depth == 0: partitions = [ { "name":v.name, "value":v.value, "where":v.where, "style":v.style, "weight":v.weight # YO! WHAT DO WE *NOT* COPY? } for i, v in enumerate(self.partitions) if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT)] elif kwargs.depth == 1: partitions = FlatList() rownum = 0 for i, part in enumerate(self.partitions): if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT): continue rownum += 1 try: for j, subpart in enumerate(part.partitions): partitions.append({ "name":join_field(split_field(subpart.parent.name) + [subpart.name]), "value":subpart.value, "where":subpart.where, "style":coalesce(subpart.style, subpart.parent.style), "weight":subpart.weight # YO! WHAT DO WE *NOT* COPY? }) except Exception, e: Log.error("", e)
def normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if not sort: return FlatList.EMPTY output = FlatList() for s in listwrap(sort): if isinstance(s, basestring) or Math.is_integer(s): output.append({"value": s, "sort": 1}) elif not s.field and not s.value and s.sort==None: #ASSUME {name: sort} FORM for n, v in s.items(): output.append({"value": n, "sort": sort_direction[v]}) else: output.append({"value": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1)}) return wrap(output)
def _convert_window(self, window): return Data( name=coalesce(window.name, window.value), value=window.value, edges=[self._convert_edge(e) for e in listwrap(window.edges)], sort=self._convert_sort(window.sort), aggregate=window.aggregate, range=self._convert_range(window.range), where=self._convert_where(window.where) )
def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None): """ Snagged from unittest/case.py, then modified (Aug2014) """ if expected == None: # None has no expectations return if test == expected: # shortcut return if not Math.is_number(expected): # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL if isinstance(expected, list) and len(expected)==0 and test == None: return if isinstance(expected, Mapping) and not expected.keys() and test == None: return if test != expected: raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) return num_param = 0 if digits != None: num_param += 1 if places != None: num_param += 1 if delta != None: num_param += 1 if num_param>1: raise TypeError("specify only one of digits, places or delta") if digits is not None: with suppress_exception: diff = Math.log10(abs(test-expected)) if diff < digits: return standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals()) elif delta is not None: if abs(test - expected) <= delta: return standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals()) else: if places is None: places = 15 with suppress_exception: diff = Math.log10(abs(test-expected)) if diff < Math.ceiling(Math.log10(abs(test)))-places: return standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals()) raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
def add_alias(self, alias=None): alias = coalesce(alias, self.settings.alias) self.cluster_state = None self.cluster.post( "/_aliases", data={ "actions": [ {"add": {"index": self.settings.index, "alias": alias}} ] }, timeout=coalesce(self.settings.timeout, 30) ) # WAIT FOR ALIAS TO APPEAR while True: response = self.cluster.get("/_cluster/state", retry={"times": 5}, timeout=3) if alias in response.metadata.indices[self.settings.index].aliases: return Log.note("Waiting for alias {{alias}} to appear", alias=alias) Till(seconds=1).wait()
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = convert.json2value(suite_json) suite = convert.value2json(coalesce(suite.fullname, suite.name)) line = line.replace(suite_json, suite) if rownum == 0: value = convert.json2value(line) if len(line) > MAX_RECORD_LENGTH: _shorten(value, source) _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > MAX_RECORD_LENGTH: value = convert.json2value(line) _shorten(value, source) _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find('"resource_usage":') != -1: value = convert.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def _worker(self, please_stop): if Sqlite.canonical: self.db = Sqlite.canonical else: self.db = sqlite3.connect(coalesce(self.filename, ':memory:')) try: full_path = File("pyLibrary/vendor/sqlite/libsqlitefunctions.so").abspath # self.db.execute("SELECT sqlite3_enable_load_extension(1)") self.db.enable_load_extension(True) self.db.execute("SELECT load_extension('" + full_path + "')") except Exception, e: Log.warning("loading sqlite extension functions failed, doing without. (no SQRT for you!)", cause=e)
def _all_combos(self): """ RETURN AN ITERATOR OF ALL COORDINATES """ combos = PRODUCT(self.dims) if not combos: return calc = [(coalesce(PRODUCT(self.dims[i+1:]), 1), mm) for i, mm in enumerate(self.dims)] for c in xrange(combos): yield tuple(int(c / dd) % mm for dd, mm in calc)
def __init__(self, **desc): Domain.__init__(self, **desc) desc = wrap(desc) self.type = "set" self.order = {} self.NULL = Null self.partitions = FlatList() if isinstance(self.key, set): Log.error("problem") if isinstance(desc.partitions[0], (int, float, basestring)): # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS self.key = "value" self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): part = {"name": p, "value": p, "dataIndex": i} self.partitions.append(part) self.map[p] = part self.order[p] = i elif desc.partitions and desc.dimension.fields and len(desc.dimension.fields) > 1: self.key = desc.key self.map = UniqueIndex(keys=desc.dimension.fields) elif desc.partitions and isinstance(desc.key, (list, set)): # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE self.key = desc.key self.map = UniqueIndex(keys=desc.key) elif desc.partitions and isinstance(desc.partitions[0][desc.key], Mapping): self.key = desc.key self.map = UniqueIndex(keys=desc.key) # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions) # self.map = UniqueIndex(keys=self.key) elif desc.key == None: Log.error("Domains must have keys") elif self.key: self.key = desc.key self.map = dict() self.map[None] = self.NULL self.order[None] = len(desc.partitions) for i, p in enumerate(desc.partitions): self.map[p[self.key]] = p self.order[p[self.key]] = i elif all(p.esfilter for p in self.partitions): # EVERY PART HAS AN esfilter DEFINED, SO USE THEM for i, p in enumerate(self.partitions): p.dataIndex = i else: Log.error("Can not hanldle") self.label = coalesce(self.label, "name")
def _open(self): """ DO NOT USE THIS UNLESS YOU close() FIRST""" try: self.db = connect( host=self.settings.host, port=self.settings.port, user=coalesce(self.settings.username, self.settings.user), passwd=coalesce(self.settings.password, self.settings.passwd), db=coalesce(self.settings.schema, self.settings.db), charset=u"utf8", use_unicode=True, ssl=coalesce(self.settings.ssl, None) ) except Exception, e: if self.settings.host.find("://") == -1: Log.error(u"Failure to connect to {{host}}:{{port}}", host= self.settings.host, port= self.settings.port, cause=e ) else: Log.error(u"Failure to connect. PROTOCOL PREFIX IS PROBABLY BAD", e)