def format_list(T, select, query=None): data = [] if isinstance(query.select, list): for row in T: r = Data() for s in select: v = s.pull(row) r[s.put.name][s.put.child] = unwraplist(v) data.append(r if r else None) elif isinstance(query.select.value, LeavesOp): for row in T: r = Data() for s in select: r[s.put.name][s.put.child] = unwraplist(s.pull(row)) data.append(r if r else None) else: for row in T: r = None for s in select: v = unwraplist(s.pull(row)) if v is None: continue if s.put.child == ".": r = v else: if r is None: r = Data() r[s.put.child] = v data.append(r) return Data( meta={"format": "list"}, data=data )
def format_list(T, select, query=None): data = [] if isinstance(query.select, list): for row in T: r = Data() for s in select: v = s.pull(row) r[s.put.name][s.put.child] = unwraplist(v) data.append(r if r else None) elif isinstance(query.select.value, LeavesOp): for row in T: r = Data() for s in select: r[s.put.name][s.put.child] = unwraplist(s.pull(row)) data.append(r if r else None) else: for row in T: r = None for s in select: v = unwraplist(s.pull(row)) if v is None: continue if s.put.child == ".": r = v else: if r is None: r = Data() r[s.put.child] = v data.append(r) return Data(meta={"format": "list"}, data=data)
def _normalize_revision(self, r, found_revision, push, get_diff): new_names = set(r.keys()) - {"rev", "node", "user", "description", "desc", "date", "files", "backedoutby", "parents", "children", "branch", "tags", "pushuser", "pushdate", "pushid", "phase", "bookmarks"} if new_names and not r.tags: Log.warning("hg is returning new property names ({{names}})", names=new_names) changeset = Changeset( id=r.node, id12=r.node[0:12], author=r.user, description=strings.limit(coalesce(r.description, r.desc), 2000), date=parse_hg_date(r.date), files=r.files, backedoutby=r.backedoutby if r.backedoutby else None, bug=self._extract_bug_id(r.description) ) rev = Revision( branch=found_revision.branch, index=r.rev, changeset=changeset, parents=unwraplist(list(set(r.parents))), children=unwraplist(list(set(r.children))), push=push, phase=r.phase, bookmarks=unwraplist(r.bookmarks), etl={"timestamp": Date.now().unix, "machine": machine_metadata} ) r.pushuser = None r.pushdate = None r.pushid = None r.node = None r.user = None r.desc = None r.description = None r.date = None r.files = None r.backedoutby = None r.parents = None r.children = None r.bookmarks = None set_default(rev, r) # ADD THE DIFF if get_diff or GET_DIFF: rev.changeset.diff = self._get_json_diff_from_hg(rev) try: _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE) with self.es_locker: self.es.add({"id": _id, "value": rev}) except Exception as e: Log.warning("did not save to ES", cause=e) return rev
def stop(self): """ BLOCKS UNTIL ALL THREADS HAVE STOPPED """ join_errors = [] children = copy(self.children) for c in reversed(children): if DEBUG and c.name: Log.note("Stopping thread {{name|quote}}", name=c.name) try: c.stop() except Exception as e: join_errors.append(e) for c in children: if DEBUG and c.name: Log.note("Joining on thread {{name|quote}}", name=c.name) try: c.join() except Exception as e: join_errors.append(e) if DEBUG and c.name: Log.note("Done join on thread {{name|quote}}", name=c.name) if join_errors: Log.error("Problem while stopping {{name|quote}}", name=self.name, cause=unwraplist(join_errors)) self.timers.stop() self.timers.join() if DEBUG: Log.note("Thread {{name|quote}} now stopped", name=self.name)
def format_table(T, select, query=None): data = [] num_columns = (MAX(select.put.index) + 1) for row in T: r = [None] * num_columns for s in select: value = unwraplist(row[s.pull]) if value == None: continue index, child = s.put.index, s.put.child if child == ".": r[index] = value else: if r[index] is None: r[index] = Data() r[index][child] = value data.append(r) header = [None] * num_columns for s in select: if header[s.put.index]: continue header[s.put.index] = s.name.replace("\\.", ".") return Data( meta={"format": "table"}, header=header, data=data )
def wrap(cls, e, stack_depth=0): """ ENSURE THE STACKTRACE AND CAUSAL CHAIN IS CAPTURED, PLUS ADD FEATURES OF Except :param e: AN EXCEPTION OF ANY TYPE :param stack_depth: HOW MANY CALLS TO TAKE OFF THE TOP OF THE STACK TRACE :return: A Except OBJECT OF THE SAME """ if e == None: return Null elif isinstance(e, (list, Except)): return e elif is_data(e): e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)]) return Except(**e) else: tb = getattr(e, '__traceback__', None) if tb is not None: trace = _parse_traceback(tb) else: trace = _extract_traceback(0) cause = Except.wrap(getattr(e, '__cause__', None)) if hasattr(e, "message") and e.message: output = Except(context=ERROR, template=text_type(e.message), trace=trace, cause=cause) else: output = Except(context=ERROR, template=text_type(e), trace=trace, cause=cause) trace = extract_stack(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap() output.trace.extend(trace) return output
def list2cube(rows, column_names=None): if column_names: keys = column_names else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) data = {k: [] for k in keys} output = wrap({ "meta": { "format": "cube" }, "edges": [{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": len(rows), "interval": 1 } }], "data": data }) for r in rows: for k in keys: data[k].append(unwraplist(r[k])) return output
def wrap(cls, e, stack_depth=0): """ ENSURE THE STACKTRACE AND CAUSAL CHAIN IS CAPTURED, PLUS ADD FEATURES OF Except :param e: AN EXCEPTION OF ANY TYPE :param stack_depth: HOW MANY CALLS TO TAKE OFF THE TOP OF THE STACK TRACE :return: A Except OBJECT OF THE SAME """ if e == None: return Null elif isinstance(e, (list, Except)): return e elif isinstance(e, Mapping): e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)]) return Except(**e) else: tb = getattr(e, '__traceback__', None) if tb is not None: trace = _parse_traceback(tb) else: trace = _extract_traceback(0) cause = Except.wrap(getattr(e, '__cause__', None)) if hasattr(e, "message") and e.message: output = Except(type=ERROR, template=text_type(e.message), trace=trace, cause=cause) else: output = Except(type=ERROR, template=text_type(e), trace=trace, cause=cause) trace = extract_stack(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap() output.trace.extend(trace) return output
def _normalize_edge(edge, schema=None): if not _Column: _late_import() if edge == None: Log.error("Edge has no value, or expression is empty") elif isinstance(edge, basestring): if schema: try: e = schema[edge] except Exception, _: e = None e = unwrap(unwraplist(e)) if e and not isinstance(e, (_Column, set, list)): if isinstance(e, _Column): return Data(name=edge, value=jx_expression(edge), allowNulls=True, domain=_normalize_domain(domain=e, schema=schema)) elif isinstance(e.fields, list) and len(e.fields) == 1: return Data(name=e.name, value=jx_expression(e.fields[0]), allowNulls=True, domain=e.getDomain()) else: return Data(name=e.name, allowNulls=True, domain=e.getDomain()) return Data(name=edge, value=jx_expression(edge), allowNulls=True, domain=_normalize_domain(schema=schema))
def format_table(T, select, query=None): data = [] num_columns = (MAX(select.put.index) + 1) for row in T: r = [None] * num_columns for s in select: value = unwraplist(row[s.pull]) if value == None: continue index, child = s.put.index, s.put.child if child == ".": r[index] = value else: if r[index] is None: r[index] = Data() r[index][child] = value data.append(r) header = [None] * num_columns for s in select: if header[s.put.index]: continue header[s.put.index] = s.name.replace("\\.", ".") return Data(meta={"format": "table"}, header=header, data=data)
def list2cube(rows, column_names=None): if column_names: keys = column_names else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) data = {k: [] for k in keys} output = wrap({ "meta": {"format": "cube"}, "edges": [ { "name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(rows), "interval": 1} } ], "data": data }) for r in rows: for k in keys: data[k].append(unwraplist(r[k])) return output
def get_decoders_by_depth(query): """ RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH """ schema = query.frum.schema output = FlatList() if query.edges: if query.sort and query.format != "cube": # REORDER EDGES/GROUPBY TO MATCH THE SORT query.edges = sort_edges(query, "edges") elif query.groupby: if query.sort and query.format != "cube": query.groupby = sort_edges(query, "groupby") for edge in wrap(coalesce(query.edges, query.groupby, [])): limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT) if edge.value != None and not isinstance(edge.value, NullOp): edge = edge.copy() vars_ = edge.value.vars() for v in vars_: if not schema.leaves(v, meta=True): Log.error("{{var}} does not exist in schema", var=v) elif edge.range: vars_ = edge.range.min.vars() | edge.range.max.vars() for v in vars_: if not schema[v]: Log.error("{{var}} does not exist in schema", var=v) elif edge.domain.dimension: vars_ = edge.domain.dimension.fields edge.domain.dimension = edge.domain.dimension.copy() edge.domain.dimension.fields = [schema[v].es_column for v in vars_] elif all(edge.domain.partitions.where): vars_ = set() for p in edge.domain.partitions: vars_ |= p.where.vars() try: vars_ |= edge.value.vars() depths = set( len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v)) if -1 in depths: Log.error("Do not know of column {{column}}", column=unwraplist( [v for v in vars_ if schema[v] == None])) if len(depths) > 1: Log.error( "expression {{expr|quote}} spans tables, can not handle", expr=edge.value) max_depth = MAX(depths) while len(output) <= max_depth: output.append([]) except Exception as e: # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY max_depth = 0 output.append([]) output[max_depth].append(AggsDecoder(edge, query, limit)) return output
def __init__(self, context=ERROR, template=Null, params=Null, cause=Null, trace=Null, **_): if context == None: raise ValueError("expecting context to not be None") if is_many(cause): self.cause = unwraplist([Except.wrap(c) for c in cause]) else: self.cause = Except.wrap(cause) Exception.__init__(self) LogItem.__init__(self, context=context, format=None, template=template, params=params) if not trace: self.trace = get_stacktrace(2) else: self.trace = trace
def warning(cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.WARNING, template, params, cause, trace) Log.note("{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1)
def _get_schema_from_list(frum, table_name, prefix_path, nested_path, columns): """ :param frum: The list :param table_name: Name of the table this list holds records for :param prefix_path: parent path :param nested_path: each nested array, in reverse order :param columns: map from full name to column definition :return: """ for d in frum: row_type = _type_to_name[d.__class__] if row_type != "object": full_name = join_field(prefix_path) column = columns[full_name] if not column: column = Column(names={table_name: full_name}, es_column=full_name, es_index=".", type="undefined", nested_path=nested_path) columns.add(column) column.type = _merge_type[column.type][row_type] else: for name, value in d.items(): full_name = join_field(prefix_path + [name]) column = columns[full_name] if not column: column = Column(names={table_name: full_name}, es_column=full_name, es_index=".", type="undefined", nested_path=nested_path) columns.add(column) if isinstance(value, list): if len(value) == 0: this_type = "undefined" elif len(value) == 1: this_type = _type_to_name[value[0].__class__] else: this_type = _type_to_name[value[0].__class__] if this_type == "object": this_type = "nested" else: this_type = _type_to_name[value.__class__] new_type = _merge_type[column.type][this_type] column.type = new_type if this_type == "object": _get_schema_from_list([value], table_name, prefix_path + [name], nested_path, columns) elif this_type == "nested": np = listwrap(nested_path) newpath = unwraplist( [join_field(split_field(np[0]) + [name])] + np) _get_schema_from_list(value, table_name, prefix_path + [name], newpath, columns)
def __data__(self): return wrap({ "meta": { "format": "list" }, "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data] })
def _get_schema_from_list(frum, table_name, prefix_path, nested_path, columns): """ :param frum: The list :param table_name: Name of the table this list holds records for :param prefix_path: parent path :param nested_path: each nested array, in reverse order :param columns: map from full name to column definition :return: """ for d in frum: row_type = _type_to_name[d.__class__] if row_type != "object": full_name = join_field(prefix_path) column = columns[full_name] if not column: column = Column( names={table_name: full_name}, es_column=full_name, es_index=".", type="undefined", nested_path=nested_path ) columns.add(column) column.type = _merge_type[column.type][row_type] else: for name, value in d.items(): full_name = join_field(prefix_path + [name]) column = columns[full_name] if not column: column = Column( names={table_name: full_name}, es_column=full_name, es_index=".", type="undefined", nested_path=nested_path ) columns.add(column) if isinstance(value, list): if len(value) == 0: this_type = "undefined" elif len(value) == 1: this_type = _type_to_name[value[0].__class__] else: this_type = _type_to_name[value[0].__class__] if this_type == "object": this_type = "nested" else: this_type = _type_to_name[value.__class__] new_type = _merge_type[column.type][this_type] column.type = new_type if this_type == "object": _get_schema_from_list([value], table_name, prefix_path + [name], nested_path, columns) elif this_type == "nested": np = listwrap(nested_path) newpath = unwraplist([join_field(split_field(np[0])+[name])]+np) _get_schema_from_list(value, table_name, prefix_path + [name], newpath, columns)
def get_decoders_by_depth(query): """ RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH """ schema = query.frum.schema output = FlatList() if query.edges: if query.sort and query.format != "cube": # REORDER EDGES/GROUPBY TO MATCH THE SORT query.edges = sort_edges(query, "edges") elif query.groupby: if query.sort and query.format != "cube": query.groupby = sort_edges(query, "groupby") for edge in wrap(coalesce(query.edges, query.groupby, [])): limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT) if edge.value != None and not isinstance(edge.value, NullOp): edge = edge.copy() vars_ = edge.value.vars() for v in vars_: if not schema.leaves(v.var): Log.error("{{var}} does not exist in schema", var=v) elif edge.range: vars_ = edge.range.min.vars() | edge.range.max.vars() for v in vars_: if not schema[v.var]: Log.error("{{var}} does not exist in schema", var=v) elif edge.domain.dimension: vars_ = edge.domain.dimension.fields edge.domain.dimension = edge.domain.dimension.copy() edge.domain.dimension.fields = [schema[v].es_column for v in vars_] elif all(edge.domain.partitions.where): vars_ = set() for p in edge.domain.partitions: vars_ |= p.where.vars() try: vars_ |= edge.value.vars() depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var)) if -1 in depths: Log.error( "Do not know of column {{column}}", column=unwraplist([v for v in vars_ if schema[v] == None]) ) if len(depths) > 1: Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value) max_depth = MAX(depths) while len(output) <= max_depth: output.append([]) except Exception as e: # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY max_depth = 0 output.append([]) output[max_depth].append(AggsDecoder(edge, query, limit)) return output
def get_decoders_by_path(query, schema): """ RETURN MAP FROM QUERY PATH TO LIST OF DECODER ARRAYS :param query: :return: """ output = {} if query.edges: if query.sort and query.format != "cube": # REORDER EDGES/GROUPBY TO MATCH THE SORT query.edges = sort_edges(query, "edges") elif query.groupby: if query.sort and query.format != "cube": query.groupby = sort_edges(query, "groupby") for edge in to_data(coalesce(query.edges, query.groupby, [])): limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT) vars_ = coalesce(edge.value.vars(), set()) if edge.range: vars_ |= edge.range.min.vars() | edge.range.max.vars() for v in vars_: if not schema[v.var]: Log.error("{{var}} does not exist in schema", var=v) elif edge.domain.dimension: vars_ |= set(Variable(v) for v in edge.domain.dimension.fields) edge.domain.dimension = edge.domain.dimension.copy() edge.domain.dimension.fields = [ schema[v.var].es_column for v in vars_ ] elif edge.domain.partitions.where and all( edge.domain.partitions.where): for p in edge.domain.partitions: vars_ |= p.where.vars() else: # SIMPLE edge.value decoder = AggsDecoder(edge, query, limit) depths = set(c.nested_path[0] for v in vars_ for c in schema.leaves(v.var)) output.setdefault(first(depths), []).append(decoder) continue depths = set(c.nested_path[0] for v in vars_ for c in schema.leaves(v.var)) if not depths: Log.error("Do not know of column {{column}}", column=unwraplist( [v for v in vars_ if schema[v.var] == None])) if len(depths) > 1: Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value) decoder = AggsDecoder(edge, query, limit) output.setdefault(first(depths), []).append(decoder) return output
def pull_nested_field(doc): hits = doc.get(pos, Null).inner_hits[name].hits.hits if not hits: return [] temp = [(index(h), value(h)) for h in hits] acc = [None] * len(temp) for i, v in temp: acc[i] = unwraplist(v) return acc
def format_object(doc): r = Data() for s in select: v = unwraplist(s.pull(doc)) if v is not None: try: r[s.put.name][s.put.child] = v except Exception as e: Log.error("what's happening here?", cause=e) return r if r else None
def __data__(self): if first(self.schema.columns).name=='.': return wrap({ "meta": {"format": "list"}, "data": self.data }) else: return wrap({ "meta": {"format": "list"}, "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data] })
def get_decoders_by_path(query): """ RETURN MAP FROM QUERY PATH TO LIST OF DECODER ARRAYS :param query: :return: """ schema = query.frum.schema output = Data() if query.edges: if query.sort and query.format != "cube": # REORDER EDGES/GROUPBY TO MATCH THE SORT query.edges = sort_edges(query, "edges") elif query.groupby: if query.sort and query.format != "cube": query.groupby = sort_edges(query, "groupby") for edge in wrap(coalesce(query.edges, query.groupby, [])): limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT) if edge.value != None and not edge.value is NULL: edge = edge.copy() vars_ = edge.value.vars() for v in vars_: if not schema.leaves(v.var): Log.error("{{var}} does not exist in schema", var=v) elif edge.range: vars_ = edge.range.min.vars() | edge.range.max.vars() for v in vars_: if not schema[v.var]: Log.error("{{var}} does not exist in schema", var=v) elif edge.domain.dimension: vars_ = edge.domain.dimension.fields edge.domain.dimension = edge.domain.dimension.copy() edge.domain.dimension.fields = [schema[v].es_column for v in vars_] elif all(edge.domain.partitions.where): vars_ = set() for p in edge.domain.partitions: vars_ |= p.where.vars() vars_ |= edge.value.vars() depths = set(c.nested_path[0] for v in vars_ for c in schema.leaves(v.var)) if not depths: Log.error("Do not know of column {{column}}", column=unwraplist( [v for v in vars_ if schema[v] == None])) if len(depths) > 1: Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value) decoder = AggsDecoder(edge, query, limit) output[literal_field(first(depths))] += [decoder] return output
def __exit__(self, exc_type, exc_val, exc_tb): causes = [] try: if isinstance(exc_val, Exception): causes.append(Except.wrap(exc_val)) self.rollback() else: self.commit() except Exception as e: causes.append(Except.wrap(e)) Log.error("Transaction failed", cause=unwraplist(causes))
def stop(self): """ BLOCKS UNTIL ALL KNOWN THREADS, EXCEPT MainThread, HAVE STOPPED """ global DEBUG self_thread = Thread.current() if self_thread != MAIN_THREAD or self_thread != self: Log.error("Only the main thread can call stop() on main thread") DEBUG = True self.please_stop.go() join_errors = [] with self.child_locker: children = copy(self.children) for c in reversed(children): DEBUG and c.name and Log.note("Stopping thread {{name|quote}}", name=c.name) try: c.stop() except Exception as e: join_errors.append(e) for c in children: DEBUG and c.name and Log.note( "Joining on thread {{name|quote}}", name=c.name ) try: c.join() except Exception as e: join_errors.append(e) DEBUG and c.name and Log.note( "Done join on thread {{name|quote}}", name=c.name ) if join_errors: Log.error( "Problem while stopping {{name|quote}}", name=self.name, cause=unwraplist(join_errors), ) with self.shutdown_locker: if self.stopped: return self.stop_logging() self.timers.stop() self.timers.join() write_profiles(self.cprofiler) DEBUG and Log.note("Thread {{name|quote}} now stopped", name=self.name) self.stopped.go()
def list2table(rows, column_names=None): if column_names: keys = list(set(column_names)) else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) output = [[unwraplist(r.get(k)) for k in keys] for r in rows] return wrap({"meta": {"format": "table"}, "header": keys, "data": output})
def format_list(T, select, query=None): data = [] if isinstance(query.select, list): for row in T: r = Data() for s in select: r[s.put.name][s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) elif isinstance(query.select.value, LeavesOp): for row in T: r = Data() for s in select: r[s.put.name][s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) else: for row in T: r = Data() for s in select: r[s.put.child] = unwraplist(row[s.pull]) data.append(r if r else None) return Data(meta={"format": "list"}, data=data)
def output(doc): acc = [] for h in doc.inner_hits[name].hits.hits: i = h._nested.offset obj = Data() for f, v in h.fields.items(): local_path = untype_path(relative_field(f, nested_path)) obj[local_path] = unwraplist(v) # EXTEND THE LIST TO THE LENGTH WE REQUIRE for _ in range(len(acc), i + 1): acc.append(None) acc[i] = expr(obj) return acc
def scrub_args(args): output = {} for k, v in list(args.items()): vs = [] for v in listwrap(v): if is_integer(v): vs.append(int(v)) elif is_number(v): vs.append(float(v)) else: vs.append(v) output[k] = unwraplist(vs) return wrap(output)
def format_list(T, select, query=None): data = [] if is_list(query.select): for row in T: r = Data() for s in select: v = unwraplist(s.pull(row)) if v is not None: try: r[s.put.name][s.put.child] = v except Exception as e: Log.error("what's happening here?") data.append(r if r else None) elif is_op(query.select.value, LeavesOp): for row in T: r = Data() for s in select: r[s.put.name][s.put.child] = unwraplist(s.pull(row)) data.append(r if r else None) else: for row in T: r = None for s in select: v = unwraplist(s.pull(row)) if v is None: continue if s.put.child == ".": r = v else: if r is None: r = Data() r[s.put.child] = v data.append(r) return Data( meta={"format": "list"}, data=data )
def output(doc): acc = [] for h in doc.inner_hits[name].hits.hits: i = h._nested.offset obj = Data() for f, v in h.fields.items(): local_path = untype_path(relative_field(f, nested_path)) obj[local_path] = unwraplist(v) # EXTEND THE LIST TO THE LENGTH WE REQUIRE for _ in range(len(acc), i+1): acc.append(None) acc[i] = expr(obj) return acc
def parse_sql(sql): # TODO: CONVERT tuple OF LITERALS INTO LITERAL LIST # # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL # if all(isinstance(r, number_types) for r in output): # pass # elif all(isinstance(r, number_types) or (is_data(r) and "literal" in r.keys()) for r in output): # output = {"literal": [r['literal'] if is_data(r) else r for r in output]} query = wrap(moz_sql_parser.parse(sql)) redundant_select = [] # PULL OUT THE AGGREGATES for s in listwrap(query.select): val = s if s == '*' else s.value # EXTRACT KNOWN AGGREGATE FUNCTIONS if is_data(val): for a in KNOWN_SQL_AGGREGATES: value = val[a] if value != None: if is_list(value): # AGGREGATE WITH PARAMETERS EG percentile(value, 0.90) s.aggregate = a s[a] = unwraplist(value[1::]) s.value = value[0] else: # SIMPLE AGGREGATE s.aggregate = a s.value = value break # LOOK FOR GROUPBY COLUMN IN SELECT CLAUSE, REMOVE DUPLICATION for g in listwrap(query.groupby): try: assertAlmostEqual(g.value, val, "") g.name = s.name redundant_select.append(s) break except Exception: pass # REMOVE THE REDUNDANT select if is_list(query.select): for r in redundant_select: query.select.remove(r) elif query.select and redundant_select: query.select = None # RENAME orderby TO sort query.sort, query.orderby = query.orderby, None query.format = "table" return query
def format_value(doc): r = None for s in select: v = unwraplist(s.pull(doc)) if v is None: continue if s.put.child == ".": r = v else: if r is None: r = Data() r[s.put.child] = v return r
def _convert_edge(self, edge): dim = self.dimensions[edge.value] if not dim: return edge if len(listwrap(dim.fields)) == 1: #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT new_edge = set_default({"value": unwraplist(dim.fields)}, edge) return new_edge new_edge.domain = dim.getDomain() edge = copy(edge) edge.value = None edge.domain = dim.getDomain() return edge
def parse_sql(sql): query = wrap(moz_sql_parser.parse(sql)) query.select = listwrap(query.select) redundant_select = [] # PULL OUT THE AGGREGATES for s in query.select: val = s if s == '*' else s.value # EXTRACT KNOWN AGGREGATE FUNCTIONS if isinstance(val, Mapping): for a in KNOWN_SQL_AGGREGATES: value = val[a] if value != None: s.aggregate = a if isinstance(value, list): # AGGREGATE WITH PARAMETERS EG percentile(value, 0.90) s[a] = unwraplist(value[1::]) s.value = value[0] elif isinstance(value, Mapping): # EXPRESSION if len(value.keys()) == 0: s.value = None else: s.value = value else: # SIMPLE VALUE s.value = value break # LOOK FOR GROUPBY COLUMN IN SELECT CLAUSE, REMOVE DUPLICATION for g in listwrap(query.groupby): try: assertAlmostEqual(g.value, val, "") g.name = s.name redundant_select.append(s) break except Exception: pass # REMOVE THE REDUNDANT select for r in redundant_select: query.select.remove(r) # RENAME orderby TO sort query.sort, query.orderby = query.orderby, None query.format = "table" return query
def format_row(doc): row = [None] * num_columns for s in select: value = unwraplist(s.pull(doc)) if value == None: continue index, child = s.put.index, s.put.child if child == ".": row[index] = value else: if row[index] is None: row[index] = Data() row[index][child] = value return row
def list2table(rows, column_names=None): if column_names: keys = list(set(column_names)) else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) output = [[unwraplist(r.get(k)) for k in keys] for r in rows] return wrap({ "meta": {"format": "table"}, "header": keys, "data": output })
def wrap(cls, e, stack_depth=0): if e == None: return Null elif isinstance(e, (list, Except)): return e elif isinstance(e, Mapping): e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)]) return Except(**e) else: if hasattr(e, "message") and e.message: cause = Except(ERROR, unicode(e.message), trace=_extract_traceback(0)) else: cause = Except(ERROR, unicode(e), trace=_extract_traceback(0)) trace = extract_stack(stack_depth + 2) # +2 = to remove the caller, and it's call to this' Except.wrap() cause.trace.extend(trace) return cause
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self._es.get_schema() # GET IDS OF DOCUMENTS results = self._es.search({ "fields": listwrap(schema._routing.path), "query": {"filtered": { "query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter() }}, "size": 200000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_variable_name(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) updates.append(s) content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8') response = self._es.cluster.post( self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"consistency": self.settings.consistency} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, log_context=None, **more_params ): """ SEND TO STDERR :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.ERROR, template, params, cause, trace) str_e = unicode(e) error_mode = cls.error_mode with suppress_exception: if not error_mode: cls.error_mode = True Log.note( "{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.FATAL}, log_context), stack_depth=stack_depth + 1 ) cls.error_mode = error_mode sys.stderr.write(str_e.encode('utf8'))
def stop(self): """ BLOCKS UNTIL ALL THREADS HAVE STOPPED THEN RUNS sys.exit(0) """ global DEBUG self_thread = Thread.current() if self_thread != MAIN_THREAD or self_thread != self: Log.error("Only the main thread can call stop() on main thread") DEBUG = True self.please_stop.go() join_errors = [] with self.child_lock: children = copy(self.children) for c in reversed(children): DEBUG and c.name and Log.note("Stopping thread {{name|quote}}", name=c.name) try: c.stop() except Exception as e: join_errors.append(e) for c in children: DEBUG and c.name and Log.note("Joining on thread {{name|quote}}", name=c.name) try: c.join() except Exception as e: join_errors.append(e) DEBUG and c.name and Log.note("Done join on thread {{name|quote}}", name=c.name) if join_errors: Log.error("Problem while stopping {{name|quote}}", name=self.name, cause=unwraplist(join_errors)) self.stop_logging() self.timers.stop() self.timers.join() write_profiles(self.cprofiler) DEBUG and Log.note("Thread {{name|quote}} now stopped", name=self.name) sys.exit(0)
def map_edge(e, map_): partitions = unwraplist([ set_default( {"where": p.where.map(map_)}, p ) for p in e.domain.partitions ]) domain = copy(e.domain) domain.where = e.domain.where.map(map_) domain.partitions = partitions edge = copy(e) edge.value = e.value.map(map_) edge.domain = domain if e.range: edge.range.min = e.range.min.map(map_) edge.range.max = e.range.max.map(map_) return edge
def format_table(T, select, query=None): data = [] num_columns = (MAX(select.put.index) + 1) for row in T: r = [None] * num_columns for s in select: value = unwraplist(s.pull(row)) if value == None: continue index, child = s.put.index, s.put.child if child == ".": r[index] = value else: if r[index] is None: r[index] = Data() r[index][child] = value data.append(r) header = [None] * num_columns if is_data(query.select) and not is_op(query.select.value, LeavesOp): for s in select: header[s.put.index] = s.name else: for s in select: if header[s.put.index]: continue if s.name == ".": header[s.put.index] = "." else: header[s.put.index] = s.name return Data( meta={"format": "table"}, header=header, data=data )
def warning( cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params ): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ timestamp = datetime.utcnow() if not is_text(template): Log.error("Log.warning was expecting a unicode template") if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = Data(dict(default_params, **more_params)) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.WARNING, template=template, params=params, cause=cause, trace=trace) Log._annotate( e, timestamp, stack_depth+1 )
def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params ): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if not isinstance(template, unicode): sys.stderr.write("Log.error was expecting a unicode template") Log.error("Log.error was expecting a unicode template") if default_params and isinstance(listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)])) trace = exceptions.extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(exceptions.ERROR, template, params, cause, trace) raise e
def __data__(self): return wrap({ "meta": {"format": "list"}, "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data] })
def _normalize_revision(self, r, found_revision, push, get_diff, get_moves): new_names = set(r.keys()) - KNOWN_TAGS if new_names and not r.tags: Log.warning( "hg is returning new property names {{names|quote}} for {{changeset}} from {{url}}", names=new_names, changeset=r.node, url=found_revision.branch.url ) changeset = Changeset( id=r.node, id12=r.node[0:12], author=r.user, description=strings.limit(coalesce(r.description, r.desc), 2000), date=parse_hg_date(r.date), files=r.files, backedoutby=r.backedoutby if r.backedoutby else None, bug=self._extract_bug_id(r.description) ) rev = Revision( branch=found_revision.branch, index=r.rev, changeset=changeset, parents=unwraplist(list(set(r.parents))), children=unwraplist(list(set(r.children))), push=push, phase=r.phase, bookmarks=unwraplist(r.bookmarks), landingsystem=r.landingsystem, etl={"timestamp": Date.now().unix, "machine": machine_metadata} ) r.pushuser = None r.pushdate = None r.pushid = None r.node = None r.user = None r.desc = None r.description = None r.date = None r.files = None r.backedoutby = None r.parents = None r.children = None r.bookmarks = None r.landingsystem = None set_default(rev, r) # ADD THE DIFF if get_diff: rev.changeset.diff = self._get_json_diff_from_hg(rev) if get_moves: rev.changeset.moves = self._get_moves_from_hg(rev) try: _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE) with self.es_locker: self.es.add({"id": _id, "value": rev}) except Exception as e: e = Except.wrap(e) Log.warning("Did not save to ES, waiting {{duration}} seconds", duration=WAIT_AFTER_NODE_FAILURE, cause=e) Till(seconds=WAIT_AFTER_NODE_FAILURE).wait() if "FORBIDDEN/12/index read-only" in e: pass # KNOWN FAILURE MODE return rev
def _get_schema_from_list(frum, table_name, parent, nested_path, columns): """ :param frum: The list :param table_name: Name of the table this list holds records for :param parent: parent path :param nested_path: each nested array, in reverse order :param columns: map from full name to column definition :return: """ for d in frum: row_type = python_type_to_json_type[d.__class__] if row_type != "object": # EXPECTING PRIMITIVE VALUE full_name = parent column = columns[full_name] if not column: column = Column( name=concat_field(table_name, full_name), es_column=full_name, es_index=".", es_type=d.__class__.__name__, jx_type=None, # WILL BE SET BELOW last_updated=Date.now(), nested_path=nested_path, ) columns.add(column) column.es_type = _merge_python_type(column.es_type, d.__class__) column.jx_type = python_type_to_json_type[column.es_type] else: for name, value in d.items(): full_name = concat_field(parent, name) column = columns[full_name] if not column: column = Column( name=concat_field(table_name, full_name), es_column=full_name, es_index=".", es_type=value.__class__.__name__, jx_type=None, # WILL BE SET BELOW last_updated=Date.now(), nested_path=nested_path, ) columns.add(column) if is_container(value): # GET TYPE OF MULTIVALUE v = list(value) if len(v) == 0: this_type = none_type.__name__ elif len(v) == 1: this_type = v[0].__class__.__name__ else: this_type = reduce( _merge_python_type, (vi.__class__.__name__ for vi in value) ) else: this_type = value.__class__.__name__ column.es_type = _merge_python_type(column.es_type, this_type) column.jx_type = python_type_to_json_type[column.es_type] if this_type in {"object", "dict", "Mapping", "Data"}: _get_schema_from_list( [value], table_name, full_name, nested_path, columns ) elif this_type in {"list", "FlatList"}: np = listwrap(nested_path) newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np) _get_schema_from_list( value, table_name, full_name, newpath, columns )
def update(self, command): self.dirty = True try: command = wrap(command) DEBUG and Log.note( "Update {{timestamp}}: {{command|json}}", command=command, timestamp=Date(command["set"].last_updated), ) eq = command.where.eq if eq.es_index: if len(eq) == 1: if unwraplist(command.clear) == ".": with self.locker: del self.data[eq.es_index] self.todo.add( ( EXECUTE, "DELETE FROM " + db_table_name + SQL_WHERE + " es_index=" + quote_value(eq.es_index), ) ) return # FASTEST all_columns = self.data.get(eq.es_index, {}).values() with self.locker: columns = [c for cs in all_columns for c in cs] elif eq.es_column and len(eq) == 2: # FASTER all_columns = self.data.get(eq.es_index, {}).values() with self.locker: columns = [ c for cs in all_columns for c in cs if c.es_column == eq.es_column ] else: # SLOWER all_columns = self.data.get(eq.es_index, {}).values() with self.locker: columns = [ c for cs in all_columns for c in cs if all( c[k] == v for k, v in eq.items() ) # THIS LINE IS VERY SLOW ] else: columns = list(self) columns = jx.filter(columns, command.where) with self.locker: for col in columns: DEBUG and Log.note( "update column {{table}}.{{column}}", table=col.es_index, column=col.es_column, ) for k in command["clear"]: if k == ".": self.todo.add((DELETE, col)) lst = self.data[col.es_index] cols = lst[col.name] cols.remove(col) if len(cols) == 0: del lst[col.name] if len(lst) == 0: del self.data[col.es_index] break else: col[k] = None else: # DID NOT DELETE COLUMNM ("."), CONTINUE TO SET PROPERTIES for k, v in command.set.items(): col[k] = v self.todo.add((UPDATE, col)) except Exception as e: Log.error("should not happen", cause=e)
def get_value_from_row(self, row): values = row[self.start]['key'].replace("||", "\b").split("|") if len(values) == 2: return None return unwraplist([v.replace("\b", "|") for v in values[1:-1]])
def selector(d): output = Data() for n, p in push_and_pull: output[n] = unwraplist(p(wrap(d))) return unwrap(output)
def _normalize_edge(edge, dim_index, limit, schema=None): """ :param edge: Not normalized edge :param dim_index: Dimensions are ordered; this is this edge's index into that order :param schema: for context :return: a normalized edge """ if not _Column: _late_import() if edge == None: Log.error("Edge has no value, or expression is empty") elif isinstance(edge, text_type): if schema: leaves = unwraplist(list(schema.leaves(edge))) if not leaves or isinstance(leaves, (list, set)): return [ Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index ) ] elif isinstance(leaves, _Column): return [Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index, domain=_normalize_domain(domain=leaves, limit=limit, schema=schema) )] elif isinstance(leaves.fields, list) and len(leaves.fields) == 1: return [Data( name=leaves.name, value=jx_expression(leaves.fields[0], schema=schema), allowNulls=True, dim=dim_index, domain=leaves.getDomain() )] else: return [Data( name=leaves.name, allowNulls=True, dim=dim_index, domain=leaves.getDomain() )] else: return [ Data( name=edge, value=jx_expression(edge, schema=schema), allowNulls=True, dim=dim_index ) ] else: edge = wrap(edge) if not edge.name and not isinstance(edge.value, text_type): Log.error("You must name compound and complex edges: {{edge}}", edge=edge) if isinstance(edge.value, (list, set)) and not edge.domain: # COMPLEX EDGE IS SHORT HAND domain = _normalize_domain(schema=schema) domain.dimension = Data(fields=edge.value) return [Data( name=edge.name, value=jx_expression(edge.value, schema=schema), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain )] domain = _normalize_domain(edge.domain, schema=schema) return [Data( name=coalesce(edge.name, edge.value), value=jx_expression(edge.value, schema=schema), range=_normalize_range(edge.range), allowNulls=bool(coalesce(edge.allowNulls, True)), dim=dim_index, domain=domain )]
def tuid_endpoint(path): with RegisterThread(): try: service.statsdaemon.update_requests(requests_total=1) if flask.request.headers.get("content-length", "") in ["", "0"]: # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK service.statsdaemon.update_requests(requests_complete=1, requests_passed=1) return Response( EXPECTING_QUERY, status=400, headers={ "Content-Type": "text/html" } ) elif int(flask.request.headers["content-length"]) > QUERY_SIZE_LIMIT: service.statsdaemon.update_requests(requests_complete=1, requests_passed=1) return Response( unicode2utf8("request too large"), status=400, headers={ "Content-Type": "text/html" } ) request_body = flask.request.get_data().strip() query = json2value(utf82unicode(request_body)) # ENSURE THE QUERY HAS THE CORRECT FORM if query['from'] != 'files': Log.error("Can only handle queries on the `files` table") ands = listwrap(query.where['and']) if len(ands) != 3: Log.error( 'expecting a simple where clause with following structure\n{{example|json}}', example={"and": [ {"eq": {"branch": "<BRANCH>"}}, {"eq": {"revision": "<REVISION>"}}, {"in": {"path": ["<path1>", "<path2>", "...", "<pathN>"]}} ]} ) rev = None paths = None branch_name = None for a in ands: rev = coalesce(rev, a.eq.revision) paths = unwraplist(coalesce(paths, a['in'].path, a.eq.path)) branch_name = coalesce(branch_name, a.eq.branch) paths = listwrap(paths) if len(paths) == 0: response, completed = [], True elif service.conn.pending_transactions > TOO_BUSY: # CHECK IF service IS VERY BUSY # TODO: BE SURE TO UPDATE STATS TOO Log.note("Too many open transactions") response, completed = [], False elif service.get_thread_count() > TOO_MANY_THREADS: Log.note("Too many threads open") response, completed = [], False else: # RETURN TUIDS with Timer("tuid internal response time for {{num}} files", {"num": len(paths)}): response, completed = service.get_tuids_from_files( revision=rev, files=paths, going_forward=True, repo=branch_name ) if not completed: Log.note( "Request for {{num}} files is incomplete for revision {{rev}}.", num=len(paths), rev=rev ) if query.meta.format == 'list': formatter = _stream_list else: formatter = _stream_table service.statsdaemon.update_requests( requests_complete=1 if completed else 0, requests_incomplete=1 if not completed else 0, requests_passed=1 ) return Response( formatter(response), status=200 if completed else 202, headers={ "Content-Type": "application/json" } ) except Exception as e: e = Except.wrap(e) service.statsdaemon.update_requests(requests_incomplete=1, requests_failed=1) Log.warning("could not handle request", cause=e) return Response( unicode2utf8(value2json(e, pretty=True)), status=400, headers={ "Content-Type": "text/html" } )
def __data__(self): output = Data({k:getattr(self,k) for k in vars(self)}) output.cause=unwraplist([c.__data__() for c in listwrap(output.cause)]) return output