Пример #1
0
def format_list(T, select, query=None):
    data = []
    if isinstance(query.select, list):
        for row in T:
            r = Data()
            for s in select:
                v = s.pull(row)
                r[s.put.name][s.put.child] = unwraplist(v)
            data.append(r if r else None)
    elif isinstance(query.select.value, LeavesOp):
        for row in T:
            r = Data()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(s.pull(row))
            data.append(r if r else None)
    else:
        for row in T:
            r = None
            for s in select:
                v = unwraplist(s.pull(row))
                if v is None:
                    continue
                if s.put.child == ".":
                    r = v
                else:
                    if r is None:
                        r = Data()
                    r[s.put.child] = v

            data.append(r)

    return Data(
        meta={"format": "list"},
        data=data
    )
Пример #2
0
def format_list(T, select, query=None):
    data = []
    if isinstance(query.select, list):
        for row in T:
            r = Data()
            for s in select:
                v = s.pull(row)
                r[s.put.name][s.put.child] = unwraplist(v)
            data.append(r if r else None)
    elif isinstance(query.select.value, LeavesOp):
        for row in T:
            r = Data()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(s.pull(row))
            data.append(r if r else None)
    else:
        for row in T:
            r = None
            for s in select:
                v = unwraplist(s.pull(row))
                if v is None:
                    continue
                if s.put.child == ".":
                    r = v
                else:
                    if r is None:
                        r = Data()
                    r[s.put.child] = v

            data.append(r)

    return Data(meta={"format": "list"}, data=data)
Пример #3
0
    def _normalize_revision(self, r, found_revision, push, get_diff):
        new_names = set(r.keys()) - {"rev", "node", "user", "description", "desc", "date", "files", "backedoutby", "parents", "children", "branch", "tags", "pushuser", "pushdate", "pushid", "phase", "bookmarks"}
        if new_names and not r.tags:
            Log.warning("hg is returning new property names ({{names}})", names=new_names)

        changeset = Changeset(
            id=r.node,
            id12=r.node[0:12],
            author=r.user,
            description=strings.limit(coalesce(r.description, r.desc), 2000),
            date=parse_hg_date(r.date),
            files=r.files,
            backedoutby=r.backedoutby if r.backedoutby else None,
            bug=self._extract_bug_id(r.description)
        )
        rev = Revision(
            branch=found_revision.branch,
            index=r.rev,
            changeset=changeset,
            parents=unwraplist(list(set(r.parents))),
            children=unwraplist(list(set(r.children))),
            push=push,
            phase=r.phase,
            bookmarks=unwraplist(r.bookmarks),
            etl={"timestamp": Date.now().unix, "machine": machine_metadata}
        )

        r.pushuser = None
        r.pushdate = None
        r.pushid = None
        r.node = None
        r.user = None
        r.desc = None
        r.description = None
        r.date = None
        r.files = None
        r.backedoutby = None
        r.parents = None
        r.children = None
        r.bookmarks = None

        set_default(rev, r)

        # ADD THE DIFF
        if get_diff or GET_DIFF:
            rev.changeset.diff = self._get_json_diff_from_hg(rev)

        try:
            _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE)
            with self.es_locker:
                self.es.add({"id": _id, "value": rev})
        except Exception as e:
            Log.warning("did not save to ES", cause=e)

        return rev
Пример #4
0
    def stop(self):
        """
        BLOCKS UNTIL ALL THREADS HAVE STOPPED
        """
        join_errors = []

        children = copy(self.children)
        for c in reversed(children):
            if DEBUG and c.name:
                Log.note("Stopping thread {{name|quote}}", name=c.name)
            try:
                c.stop()
            except Exception as e:
                join_errors.append(e)

        for c in children:
            if DEBUG and c.name:
                Log.note("Joining on thread {{name|quote}}", name=c.name)
            try:
                c.join()
            except Exception as e:
                join_errors.append(e)

            if DEBUG and c.name:
                Log.note("Done join on thread {{name|quote}}", name=c.name)

        if join_errors:
            Log.error("Problem while stopping {{name|quote}}", name=self.name, cause=unwraplist(join_errors))

        self.timers.stop()
        self.timers.join()

        if DEBUG:
            Log.note("Thread {{name|quote}} now stopped", name=self.name)
Пример #5
0
def format_table(T, select, query=None):
    data = []
    num_columns = (MAX(select.put.index) + 1)
    for row in T:
        r = [None] * num_columns
        for s in select:
            value = unwraplist(row[s.pull])

            if value == None:
                continue

            index, child = s.put.index, s.put.child
            if child == ".":
                r[index] = value
            else:
                if r[index] is None:
                    r[index] = Data()
                r[index][child] = value

        data.append(r)

    header = [None] * num_columns
    for s in select:
        if header[s.put.index]:
            continue
        header[s.put.index] = s.name.replace("\\.", ".")

    return Data(
        meta={"format": "table"},
        header=header,
        data=data
    )
Пример #6
0
    def wrap(cls, e, stack_depth=0):
        """
        ENSURE THE STACKTRACE AND CAUSAL CHAIN IS CAPTURED, PLUS ADD FEATURES OF Except

        :param e: AN EXCEPTION OF ANY TYPE
        :param stack_depth: HOW MANY CALLS TO TAKE OFF THE TOP OF THE STACK TRACE
        :return: A Except OBJECT OF THE SAME
        """
        if e == None:
            return Null
        elif isinstance(e, (list, Except)):
            return e
        elif is_data(e):
            e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)])
            return Except(**e)
        else:
            tb = getattr(e, '__traceback__', None)
            if tb is not None:
                trace = _parse_traceback(tb)
            else:
                trace = _extract_traceback(0)

            cause = Except.wrap(getattr(e, '__cause__', None))
            if hasattr(e, "message") and e.message:
                output = Except(context=ERROR, template=text_type(e.message), trace=trace, cause=cause)
            else:
                output = Except(context=ERROR, template=text_type(e), trace=trace, cause=cause)

            trace = extract_stack(stack_depth + 2)  # +2 = to remove the caller, and it's call to this' Except.wrap()
            output.trace.extend(trace)
            return output
Пример #7
0
def list2cube(rows, column_names=None):
    if column_names:
        keys = column_names
    else:
        columns = set()
        for r in rows:
            columns |= set(r.keys())
        keys = list(columns)

    data = {k: [] for k in keys}
    output = wrap({
        "meta": {
            "format": "cube"
        },
        "edges": [{
            "name": "rownum",
            "domain": {
                "type": "rownum",
                "min": 0,
                "max": len(rows),
                "interval": 1
            }
        }],
        "data":
        data
    })

    for r in rows:
        for k in keys:
            data[k].append(unwraplist(r[k]))

    return output
Пример #8
0
    def wrap(cls, e, stack_depth=0):
        """
        ENSURE THE STACKTRACE AND CAUSAL CHAIN IS CAPTURED, PLUS ADD FEATURES OF Except

        :param e: AN EXCEPTION OF ANY TYPE
        :param stack_depth: HOW MANY CALLS TO TAKE OFF THE TOP OF THE STACK TRACE
        :return: A Except OBJECT OF THE SAME
        """
        if e == None:
            return Null
        elif isinstance(e, (list, Except)):
            return e
        elif isinstance(e, Mapping):
            e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)])
            return Except(**e)
        else:
            tb = getattr(e, '__traceback__', None)
            if tb is not None:
                trace = _parse_traceback(tb)
            else:
                trace = _extract_traceback(0)

            cause = Except.wrap(getattr(e, '__cause__', None))
            if hasattr(e, "message") and e.message:
                output = Except(type=ERROR, template=text_type(e.message), trace=trace, cause=cause)
            else:
                output = Except(type=ERROR, template=text_type(e), trace=trace, cause=cause)

            trace = extract_stack(stack_depth + 2)  # +2 = to remove the caller, and it's call to this' Except.wrap()
            output.trace.extend(trace)
            return output
Пример #9
0
def _normalize_edge(edge, schema=None):
    if not _Column:
        _late_import()

    if edge == None:
        Log.error("Edge has no value, or expression is empty")
    elif isinstance(edge, basestring):
        if schema:
            try:
                e = schema[edge]
            except Exception, _:
                e = None
            e = unwrap(unwraplist(e))
            if e and not isinstance(e, (_Column, set, list)):
                if isinstance(e, _Column):
                    return Data(name=edge,
                                value=jx_expression(edge),
                                allowNulls=True,
                                domain=_normalize_domain(domain=e,
                                                         schema=schema))
                elif isinstance(e.fields, list) and len(e.fields) == 1:
                    return Data(name=e.name,
                                value=jx_expression(e.fields[0]),
                                allowNulls=True,
                                domain=e.getDomain())
                else:
                    return Data(name=e.name,
                                allowNulls=True,
                                domain=e.getDomain())
        return Data(name=edge,
                    value=jx_expression(edge),
                    allowNulls=True,
                    domain=_normalize_domain(schema=schema))
Пример #10
0
def format_table(T, select, query=None):
    data = []
    num_columns = (MAX(select.put.index) + 1)
    for row in T:
        r = [None] * num_columns
        for s in select:
            value = unwraplist(row[s.pull])

            if value == None:
                continue

            index, child = s.put.index, s.put.child
            if child == ".":
                r[index] = value
            else:
                if r[index] is None:
                    r[index] = Data()
                r[index][child] = value

        data.append(r)

    header = [None] * num_columns
    for s in select:
        if header[s.put.index]:
            continue
        header[s.put.index] = s.name.replace("\\.", ".")

    return Data(meta={"format": "table"}, header=header, data=data)
Пример #11
0
def list2cube(rows, column_names=None):
    if column_names:
        keys = column_names
    else:
        columns = set()
        for r in rows:
            columns |= set(r.keys())
        keys = list(columns)

    data = {k: [] for k in keys}
    output = wrap({
        "meta": {"format": "cube"},
        "edges": [
            {
                "name": "rownum",
                "domain": {"type": "rownum", "min": 0, "max": len(rows), "interval": 1}
            }
        ],
        "data": data
    })

    for r in rows:
        for k in keys:
            data[k].append(unwraplist(r[k]))

    return output
Пример #12
0
def get_decoders_by_depth(query):
    """
    RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
    """
    schema = query.frum.schema
    output = FlatList()

    if query.edges:
        if query.sort and query.format != "cube":
            # REORDER EDGES/GROUPBY TO MATCH THE SORT
            query.edges = sort_edges(query, "edges")
    elif query.groupby:
        if query.sort and query.format != "cube":
            query.groupby = sort_edges(query, "groupby")

    for edge in wrap(coalesce(query.edges, query.groupby, [])):
        limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT)
        if edge.value != None and not isinstance(edge.value, NullOp):
            edge = edge.copy()
            vars_ = edge.value.vars()
            for v in vars_:
                if not schema.leaves(v, meta=True):
                    Log.error("{{var}} does not exist in schema", var=v)
        elif edge.range:
            vars_ = edge.range.min.vars() | edge.range.max.vars()
            for v in vars_:
                if not schema[v]:
                    Log.error("{{var}} does not exist in schema", var=v)
        elif edge.domain.dimension:
            vars_ = edge.domain.dimension.fields
            edge.domain.dimension = edge.domain.dimension.copy()
            edge.domain.dimension.fields = [schema[v].es_column for v in vars_]
        elif all(edge.domain.partitions.where):
            vars_ = set()
            for p in edge.domain.partitions:
                vars_ |= p.where.vars()

        try:
            vars_ |= edge.value.vars()
            depths = set(
                len(c.nested_path) - 1 for v in vars_
                for c in schema.leaves(v))
            if -1 in depths:
                Log.error("Do not know of column {{column}}",
                          column=unwraplist(
                              [v for v in vars_ if schema[v] == None]))
            if len(depths) > 1:
                Log.error(
                    "expression {{expr|quote}} spans tables, can not handle",
                    expr=edge.value)
            max_depth = MAX(depths)
            while len(output) <= max_depth:
                output.append([])
        except Exception as e:
            # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY
            max_depth = 0
            output.append([])

        output[max_depth].append(AggsDecoder(edge, query, limit))
    return output
Пример #13
0
    def __init__(self,
                 context=ERROR,
                 template=Null,
                 params=Null,
                 cause=Null,
                 trace=Null,
                 **_):
        if context == None:
            raise ValueError("expecting context to not be None")

        if is_many(cause):
            self.cause = unwraplist([Except.wrap(c) for c in cause])
        else:
            self.cause = Except.wrap(cause)

        Exception.__init__(self)
        LogItem.__init__(self,
                         context=context,
                         format=None,
                         template=template,
                         params=params)

        if not trace:
            self.trace = get_stacktrace(2)
        else:
            self.trace = trace
Пример #14
0
    def warning(cls,
                template,
                default_params={},
                cause=None,
                stack_depth=0,
                log_context=None,
                **more_params):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")
        params = dict(unwrap(default_params), **more_params)
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template, params, cause, trace)
        Log.note("{{error|unicode}}",
                 error=e,
                 log_context=set_default({"context": exceptions.WARNING},
                                         log_context),
                 stack_depth=stack_depth + 1)
Пример #15
0
def _get_schema_from_list(frum, table_name, prefix_path, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param prefix_path: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix_path)
            column = columns[full_name]
            if not column:
                column = Column(names={table_name: full_name},
                                es_column=full_name,
                                es_index=".",
                                type="undefined",
                                nested_path=nested_path)
                columns.add(column)
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix_path + [name])
                column = columns[full_name]
                if not column:
                    column = Column(names={table_name: full_name},
                                    es_column=full_name,
                                    es_index=".",
                                    type="undefined",
                                    nested_path=nested_path)
                    columns.add(column)
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], table_name,
                                          prefix_path + [name], nested_path,
                                          columns)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist(
                        [join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(value, table_name,
                                          prefix_path + [name], newpath,
                                          columns)
 def __data__(self):
     return wrap({
         "meta": {
             "format": "list"
         },
         "data": [{k: unwraplist(v)
                   for k, v in row.items()} for row in self.data]
     })
Пример #17
0
def _get_schema_from_list(frum, table_name, prefix_path, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param prefix_path: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix_path)
            column = columns[full_name]
            if not column:
                column = Column(
                    names={table_name: full_name},
                    es_column=full_name,
                    es_index=".",
                    type="undefined",
                    nested_path=nested_path
                )
                columns.add(column)
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix_path + [name])
                column = columns[full_name]
                if not column:
                    column = Column(
                        names={table_name: full_name},
                        es_column=full_name,
                        es_index=".",
                        type="undefined",
                        nested_path=nested_path
                    )
                    columns.add(column)
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], table_name, prefix_path + [name], nested_path, columns)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, table_name, prefix_path + [name], newpath, columns)
Пример #18
0
def get_decoders_by_depth(query):
    """
    RETURN A LIST OF DECODER ARRAYS, ONE ARRAY FOR EACH NESTED DEPTH
    """
    schema = query.frum.schema
    output = FlatList()

    if query.edges:
        if query.sort and query.format != "cube":
            # REORDER EDGES/GROUPBY TO MATCH THE SORT
            query.edges = sort_edges(query, "edges")
    elif query.groupby:
        if query.sort and query.format != "cube":
            query.groupby = sort_edges(query, "groupby")

    for edge in wrap(coalesce(query.edges, query.groupby, [])):
        limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT)
        if edge.value != None and not isinstance(edge.value, NullOp):
            edge = edge.copy()
            vars_ = edge.value.vars()
            for v in vars_:
                if not schema.leaves(v.var):
                    Log.error("{{var}} does not exist in schema", var=v)
        elif edge.range:
            vars_ = edge.range.min.vars() | edge.range.max.vars()
            for v in vars_:
                if not schema[v.var]:
                    Log.error("{{var}} does not exist in schema", var=v)
        elif edge.domain.dimension:
            vars_ = edge.domain.dimension.fields
            edge.domain.dimension = edge.domain.dimension.copy()
            edge.domain.dimension.fields = [schema[v].es_column for v in vars_]
        elif all(edge.domain.partitions.where):
            vars_ = set()
            for p in edge.domain.partitions:
                vars_ |= p.where.vars()

        try:
            vars_ |= edge.value.vars()
            depths = set(len(c.nested_path) - 1 for v in vars_ for c in schema.leaves(v.var))
            if -1 in depths:
                Log.error(
                    "Do not know of column {{column}}",
                    column=unwraplist([v for v in vars_ if schema[v] == None])
                )
            if len(depths) > 1:
                Log.error("expression {{expr|quote}} spans tables, can not handle", expr=edge.value)
            max_depth = MAX(depths)
            while len(output) <= max_depth:
                output.append([])
        except Exception as e:
            # USUALLY THE SCHEMA IS EMPTY, SO WE ASSUME THIS IS A SIMPLE QUERY
            max_depth = 0
            output.append([])

        output[max_depth].append(AggsDecoder(edge, query, limit))
    return output
Пример #19
0
def get_decoders_by_path(query, schema):
    """
    RETURN MAP FROM QUERY PATH TO LIST OF DECODER ARRAYS

    :param query:
    :return:
    """
    output = {}

    if query.edges:
        if query.sort and query.format != "cube":
            # REORDER EDGES/GROUPBY TO MATCH THE SORT
            query.edges = sort_edges(query, "edges")
    elif query.groupby:
        if query.sort and query.format != "cube":
            query.groupby = sort_edges(query, "groupby")

    for edge in to_data(coalesce(query.edges, query.groupby, [])):
        limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT)
        vars_ = coalesce(edge.value.vars(), set())

        if edge.range:
            vars_ |= edge.range.min.vars() | edge.range.max.vars()
            for v in vars_:
                if not schema[v.var]:
                    Log.error("{{var}} does not exist in schema", var=v)
        elif edge.domain.dimension:
            vars_ |= set(Variable(v) for v in edge.domain.dimension.fields)
            edge.domain.dimension = edge.domain.dimension.copy()
            edge.domain.dimension.fields = [
                schema[v.var].es_column for v in vars_
            ]
        elif edge.domain.partitions.where and all(
                edge.domain.partitions.where):
            for p in edge.domain.partitions:
                vars_ |= p.where.vars()
        else:
            # SIMPLE edge.value
            decoder = AggsDecoder(edge, query, limit)
            depths = set(c.nested_path[0] for v in vars_
                         for c in schema.leaves(v.var))
            output.setdefault(first(depths), []).append(decoder)
            continue

        depths = set(c.nested_path[0] for v in vars_
                     for c in schema.leaves(v.var))
        if not depths:
            Log.error("Do not know of column {{column}}",
                      column=unwraplist(
                          [v for v in vars_ if schema[v.var] == None]))
        if len(depths) > 1:
            Log.error("expression {{expr|quote}} spans tables, can not handle",
                      expr=edge.value)

        decoder = AggsDecoder(edge, query, limit)
        output.setdefault(first(depths), []).append(decoder)
    return output
Пример #20
0
                def pull_nested_field(doc):
                    hits = doc.get(pos, Null).inner_hits[name].hits.hits
                    if not hits:
                        return []

                    temp = [(index(h), value(h)) for h in hits]
                    acc = [None] * len(temp)
                    for i, v in temp:
                        acc[i] = unwraplist(v)
                    return acc
Пример #21
0
 def format_object(doc):
     r = Data()
     for s in select:
         v = unwraplist(s.pull(doc))
         if v is not None:
             try:
                 r[s.put.name][s.put.child] = v
             except Exception as e:
                 Log.error("what's happening here?", cause=e)
     return r if r else None
Пример #22
0
 def __data__(self):
     if first(self.schema.columns).name=='.':
         return wrap({
             "meta": {"format": "list"},
             "data": self.data
         })
     else:
         return wrap({
             "meta": {"format": "list"},
             "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data]
         })
Пример #23
0
def get_decoders_by_path(query):
    """
    RETURN MAP FROM QUERY PATH TO LIST OF DECODER ARRAYS

    :param query:
    :return:
    """
    schema = query.frum.schema
    output = Data()

    if query.edges:
        if query.sort and query.format != "cube":
            # REORDER EDGES/GROUPBY TO MATCH THE SORT
            query.edges = sort_edges(query, "edges")
    elif query.groupby:
        if query.sort and query.format != "cube":
            query.groupby = sort_edges(query, "groupby")

    for edge in wrap(coalesce(query.edges, query.groupby, [])):
        limit = coalesce(edge.domain.limit, query.limit, DEFAULT_LIMIT)
        if edge.value != None and not edge.value is NULL:
            edge = edge.copy()
            vars_ = edge.value.vars()
            for v in vars_:
                if not schema.leaves(v.var):
                    Log.error("{{var}} does not exist in schema", var=v)
        elif edge.range:
            vars_ = edge.range.min.vars() | edge.range.max.vars()
            for v in vars_:
                if not schema[v.var]:
                    Log.error("{{var}} does not exist in schema", var=v)
        elif edge.domain.dimension:
            vars_ = edge.domain.dimension.fields
            edge.domain.dimension = edge.domain.dimension.copy()
            edge.domain.dimension.fields = [schema[v].es_column for v in vars_]
        elif all(edge.domain.partitions.where):
            vars_ = set()
            for p in edge.domain.partitions:
                vars_ |= p.where.vars()

        vars_ |= edge.value.vars()
        depths = set(c.nested_path[0] for v in vars_
                     for c in schema.leaves(v.var))
        if not depths:
            Log.error("Do not know of column {{column}}",
                      column=unwraplist(
                          [v for v in vars_ if schema[v] == None]))
        if len(depths) > 1:
            Log.error("expression {{expr|quote}} spans tables, can not handle",
                      expr=edge.value)

        decoder = AggsDecoder(edge, query, limit)
        output[literal_field(first(depths))] += [decoder]
    return output
Пример #24
0
 def __exit__(self, exc_type, exc_val, exc_tb):
     causes = []
     try:
         if isinstance(exc_val, Exception):
             causes.append(Except.wrap(exc_val))
             self.rollback()
         else:
             self.commit()
     except Exception as e:
         causes.append(Except.wrap(e))
         Log.error("Transaction failed", cause=unwraplist(causes))
Пример #25
0
 def __data__(self):
     if first(self.schema.columns).name=='.':
         return wrap({
             "meta": {"format": "list"},
             "data": self.data
         })
     else:
         return wrap({
             "meta": {"format": "list"},
             "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data]
         })
Пример #26
0
    def stop(self):
        """
        BLOCKS UNTIL ALL KNOWN THREADS, EXCEPT MainThread, HAVE STOPPED
        """
        global DEBUG

        self_thread = Thread.current()
        if self_thread != MAIN_THREAD or self_thread != self:
            Log.error("Only the main thread can call stop() on main thread")

        DEBUG = True
        self.please_stop.go()

        join_errors = []
        with self.child_locker:
            children = copy(self.children)
        for c in reversed(children):
            DEBUG and c.name and Log.note("Stopping thread {{name|quote}}", name=c.name)
            try:
                c.stop()
            except Exception as e:
                join_errors.append(e)

        for c in children:
            DEBUG and c.name and Log.note(
                "Joining on thread {{name|quote}}", name=c.name
            )
            try:
                c.join()
            except Exception as e:
                join_errors.append(e)

            DEBUG and c.name and Log.note(
                "Done join on thread {{name|quote}}", name=c.name
            )

        if join_errors:
            Log.error(
                "Problem while stopping {{name|quote}}",
                name=self.name,
                cause=unwraplist(join_errors),
            )

        with self.shutdown_locker:
            if self.stopped:
                return
            self.stop_logging()
            self.timers.stop()
            self.timers.join()

            write_profiles(self.cprofiler)
            DEBUG and Log.note("Thread {{name|quote}} now stopped", name=self.name)
            self.stopped.go()
Пример #27
0
def list2table(rows, column_names=None):
    if column_names:
        keys = list(set(column_names))
    else:
        columns = set()
        for r in rows:
            columns |= set(r.keys())
        keys = list(columns)

    output = [[unwraplist(r.get(k)) for k in keys] for r in rows]

    return wrap({"meta": {"format": "table"}, "header": keys, "data": output})
Пример #28
0
def format_list(T, select, query=None):
    data = []
    if isinstance(query.select, list):
        for row in T:
            r = Data()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)
    elif isinstance(query.select.value, LeavesOp):
        for row in T:
            r = Data()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)
    else:
        for row in T:
            r = Data()
            for s in select:
                r[s.put.child] = unwraplist(row[s.pull])
            data.append(r if r else None)

    return Data(meta={"format": "list"}, data=data)
Пример #29
0
 def output(doc):
     acc = []
     for h in doc.inner_hits[name].hits.hits:
         i = h._nested.offset
         obj = Data()
         for f, v in h.fields.items():
             local_path = untype_path(relative_field(f, nested_path))
             obj[local_path] = unwraplist(v)
         # EXTEND THE LIST TO THE LENGTH WE REQUIRE
         for _ in range(len(acc), i + 1):
             acc.append(None)
         acc[i] = expr(obj)
     return acc
Пример #30
0
def scrub_args(args):
    output = {}
    for k, v in list(args.items()):
        vs = []
        for v in listwrap(v):
            if is_integer(v):
                vs.append(int(v))
            elif is_number(v):
                vs.append(float(v))
            else:
                vs.append(v)
        output[k] = unwraplist(vs)
    return wrap(output)
Пример #31
0
def format_list(T, select, query=None):
    data = []
    if is_list(query.select):
        for row in T:
            r = Data()
            for s in select:
                v = unwraplist(s.pull(row))
                if v is not None:
                    try:
                        r[s.put.name][s.put.child] = v
                    except Exception as e:
                        Log.error("what's happening here?")
            data.append(r if r else None)
    elif is_op(query.select.value, LeavesOp):
        for row in T:
            r = Data()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(s.pull(row))
            data.append(r if r else None)
    else:
        for row in T:
            r = None
            for s in select:
                v = unwraplist(s.pull(row))
                if v is None:
                    continue
                if s.put.child == ".":
                    r = v
                else:
                    if r is None:
                        r = Data()
                    r[s.put.child] = v

            data.append(r)

    return Data(
        meta={"format": "list"},
        data=data
    )
Пример #32
0
 def output(doc):
     acc = []
     for h in doc.inner_hits[name].hits.hits:
         i = h._nested.offset
         obj = Data()
         for f, v in h.fields.items():
             local_path = untype_path(relative_field(f, nested_path))
             obj[local_path] = unwraplist(v)
         # EXTEND THE LIST TO THE LENGTH WE REQUIRE
         for _ in range(len(acc), i+1):
             acc.append(None)
         acc[i] = expr(obj)
     return acc
Пример #33
0
def format_list(T, select, query=None):
    data = []
    if is_list(query.select):
        for row in T:
            r = Data()
            for s in select:
                v = unwraplist(s.pull(row))
                if v is not None:
                    try:
                        r[s.put.name][s.put.child] = v
                    except Exception as e:
                        Log.error("what's happening here?")
            data.append(r if r else None)
    elif is_op(query.select.value, LeavesOp):
        for row in T:
            r = Data()
            for s in select:
                r[s.put.name][s.put.child] = unwraplist(s.pull(row))
            data.append(r if r else None)
    else:
        for row in T:
            r = None
            for s in select:
                v = unwraplist(s.pull(row))
                if v is None:
                    continue
                if s.put.child == ".":
                    r = v
                else:
                    if r is None:
                        r = Data()
                    r[s.put.child] = v

            data.append(r)

    return Data(
        meta={"format": "list"},
        data=data
    )
Пример #34
0
def parse_sql(sql):
    # TODO: CONVERT tuple OF LITERALS INTO LITERAL LIST
    # # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL
    # if all(isinstance(r, number_types) for r in output):
    #     pass
    # elif all(isinstance(r, number_types) or (is_data(r) and "literal" in r.keys()) for r in output):
    #     output = {"literal": [r['literal'] if is_data(r) else r for r in output]}
    query = wrap(moz_sql_parser.parse(sql))
    redundant_select = []
    # PULL OUT THE AGGREGATES
    for s in listwrap(query.select):
        val = s if s == '*' else s.value

        # EXTRACT KNOWN AGGREGATE FUNCTIONS
        if is_data(val):
            for a in KNOWN_SQL_AGGREGATES:
                value = val[a]
                if value != None:
                    if is_list(value):
                        # AGGREGATE WITH PARAMETERS  EG percentile(value, 0.90)
                        s.aggregate = a
                        s[a] = unwraplist(value[1::])
                        s.value = value[0]
                    else:
                        # SIMPLE AGGREGATE
                        s.aggregate = a
                        s.value = value
                    break

        # LOOK FOR GROUPBY COLUMN IN SELECT CLAUSE, REMOVE DUPLICATION
        for g in listwrap(query.groupby):
            try:
                assertAlmostEqual(g.value, val, "")
                g.name = s.name
                redundant_select.append(s)
                break
            except Exception:
                pass

    # REMOVE THE REDUNDANT select
    if is_list(query.select):
        for r in redundant_select:
            query.select.remove(r)
    elif query.select and redundant_select:
        query.select = None

    # RENAME orderby TO sort
    query.sort, query.orderby = query.orderby, None
    query.format = "table"
    return query
Пример #35
0
        def format_value(doc):
            r = None
            for s in select:
                v = unwraplist(s.pull(doc))
                if v is None:
                    continue
                if s.put.child == ".":
                    r = v
                else:
                    if r is None:
                        r = Data()
                    r[s.put.child] = v

            return r
Пример #36
0
    def _convert_edge(self, edge):
        dim = self.dimensions[edge.value]
        if not dim:
            return edge

        if len(listwrap(dim.fields)) == 1:
            #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT
            new_edge = set_default({"value": unwraplist(dim.fields)}, edge)
            return new_edge
            new_edge.domain = dim.getDomain()

        edge = copy(edge)
        edge.value = None
        edge.domain = dim.getDomain()
        return edge
Пример #37
0
    def _convert_edge(self, edge):
        dim = self.dimensions[edge.value]
        if not dim:
            return edge

        if len(listwrap(dim.fields)) == 1:
            #TODO: CHECK IF EDGE DOMAIN AND DIMENSION DOMAIN CONFLICT
            new_edge = set_default({"value": unwraplist(dim.fields)}, edge)
            return new_edge
            new_edge.domain = dim.getDomain()

        edge = copy(edge)
        edge.value = None
        edge.domain = dim.getDomain()
        return edge
Пример #38
0
def parse_sql(sql):
    query = wrap(moz_sql_parser.parse(sql))
    query.select = listwrap(query.select)

    redundant_select = []
    # PULL OUT THE AGGREGATES
    for s in query.select:
        val = s if s == '*' else s.value

        # EXTRACT KNOWN AGGREGATE FUNCTIONS
        if isinstance(val, Mapping):
            for a in KNOWN_SQL_AGGREGATES:
                value = val[a]
                if value != None:
                    s.aggregate = a
                    if isinstance(value, list):
                        # AGGREGATE WITH PARAMETERS  EG percentile(value, 0.90)
                        s[a] = unwraplist(value[1::])
                        s.value = value[0]
                    elif isinstance(value, Mapping):
                        # EXPRESSION
                        if len(value.keys()) == 0:
                            s.value = None
                        else:
                            s.value = value
                    else:
                        # SIMPLE VALUE
                        s.value = value
                    break

        # LOOK FOR GROUPBY COLUMN IN SELECT CLAUSE, REMOVE DUPLICATION
        for g in listwrap(query.groupby):
            try:
                assertAlmostEqual(g.value, val, "")
                g.name = s.name
                redundant_select.append(s)
                break
            except Exception:
                pass

    # REMOVE THE REDUNDANT select
    for r in redundant_select:
        query.select.remove(r)

    # RENAME orderby TO sort
    query.sort, query.orderby = query.orderby, None
    query.format = "table"
    return query
Пример #39
0
    def format_row(doc):
        row = [None] * num_columns
        for s in select:
            value = unwraplist(s.pull(doc))

            if value == None:
                continue

            index, child = s.put.index, s.put.child
            if child == ".":
                row[index] = value
            else:
                if row[index] is None:
                    row[index] = Data()
                row[index][child] = value
        return row
Пример #40
0
def list2table(rows, column_names=None):
    if column_names:
        keys = list(set(column_names))
    else:
        columns = set()
        for r in rows:
            columns |= set(r.keys())
        keys = list(columns)

    output = [[unwraplist(r.get(k)) for k in keys] for r in rows]

    return wrap({
        "meta": {"format": "table"},
        "header": keys,
        "data": output
    })
Пример #41
0
    def wrap(cls, e, stack_depth=0):
        if e == None:
            return Null
        elif isinstance(e, (list, Except)):
            return e
        elif isinstance(e, Mapping):
            e.cause = unwraplist([Except.wrap(c) for c in listwrap(e.cause)])
            return Except(**e)
        else:
            if hasattr(e, "message") and e.message:
                cause = Except(ERROR, unicode(e.message), trace=_extract_traceback(0))
            else:
                cause = Except(ERROR, unicode(e), trace=_extract_traceback(0))

            trace = extract_stack(stack_depth + 2)  # +2 = to remove the caller, and it's call to this' Except.wrap()
            cause.trace.extend(trace)
            return cause
Пример #42
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search({
            "fields": listwrap(schema._routing.path),
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": jx_expression(command.where).to_esfilter()
            }},
            "size": 200000
        })

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = FlatList()
        for k, v in command.set.items():
            if not is_variable_name(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()})

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}})
                    updates.append(s)
            content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8')
            response = self._es.cluster.post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"},
                timeout=self.settings.timeout,
                params={"consistency": self.settings.consistency}
            )
            if response.errors:
                Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
Пример #43
0
    def fatal(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        SEND TO STDERR

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.ERROR, template, params, cause, trace)
        str_e = unicode(e)

        error_mode = cls.error_mode
        with suppress_exception:
            if not error_mode:
                cls.error_mode = True
                Log.note(
                    "{{error|unicode}}",
                    error=e,
                    log_context=set_default({"context": exceptions.FATAL}, log_context),
                    stack_depth=stack_depth + 1
                )
        cls.error_mode = error_mode

        sys.stderr.write(str_e.encode('utf8'))
Пример #44
0
    def stop(self):
        """
        BLOCKS UNTIL ALL THREADS HAVE STOPPED
        THEN RUNS sys.exit(0)
        """
        global DEBUG

        self_thread = Thread.current()
        if self_thread != MAIN_THREAD or self_thread != self:
            Log.error("Only the main thread can call stop() on main thread")

        DEBUG = True
        self.please_stop.go()

        join_errors = []
        with self.child_lock:
            children = copy(self.children)
        for c in reversed(children):
            DEBUG and c.name and Log.note("Stopping thread {{name|quote}}", name=c.name)
            try:
                c.stop()
            except Exception as e:
                join_errors.append(e)

        for c in children:
            DEBUG and c.name and Log.note("Joining on thread {{name|quote}}", name=c.name)
            try:
                c.join()
            except Exception as e:
                join_errors.append(e)

            DEBUG and c.name and Log.note("Done join on thread {{name|quote}}", name=c.name)

        if join_errors:
            Log.error("Problem while stopping {{name|quote}}", name=self.name, cause=unwraplist(join_errors))

        self.stop_logging()
        self.timers.stop()
        self.timers.join()

        write_profiles(self.cprofiler)
        DEBUG and Log.note("Thread {{name|quote}} now stopped", name=self.name)
        sys.exit(0)
Пример #45
0
        def map_edge(e, map_):
            partitions = unwraplist([
                set_default(
                    {"where": p.where.map(map_)},
                    p
                )
                for p in e.domain.partitions
            ])

            domain = copy(e.domain)
            domain.where = e.domain.where.map(map_)
            domain.partitions = partitions

            edge = copy(e)
            edge.value = e.value.map(map_)
            edge.domain = domain
            if e.range:
                edge.range.min = e.range.min.map(map_)
                edge.range.max = e.range.max.map(map_)
            return edge
Пример #46
0
def format_table(T, select, query=None):
    data = []
    num_columns = (MAX(select.put.index) + 1)
    for row in T:
        r = [None] * num_columns
        for s in select:
            value = unwraplist(s.pull(row))

            if value == None:
                continue

            index, child = s.put.index, s.put.child
            if child == ".":
                r[index] = value
            else:
                if r[index] is None:
                    r[index] = Data()
                r[index][child] = value

        data.append(r)

    header = [None] * num_columns

    if is_data(query.select) and not is_op(query.select.value, LeavesOp):
        for s in select:
            header[s.put.index] = s.name
    else:
        for s in select:
            if header[s.put.index]:
                continue
            if s.name == ".":
                header[s.put.index] = "."
            else:
                header[s.put.index] = s.name

    return Data(
        meta={"format": "table"},
        header=header,
        data=data
    )
Пример #47
0
    def warning(
        cls,
        template,
        default_params={},
        cause=None,
        stack_depth=0,
        log_context=None,
        **more_params
    ):
        """
        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        timestamp = datetime.utcnow()
        if not is_text(template):
            Log.error("Log.warning was expecting a unicode template")

        if isinstance(default_params, BaseException):
            cause = default_params
            default_params = {}

        if "values" in more_params.keys():
            Log.error("Can not handle a logging parameter by name `values`")

        params = Data(dict(default_params, **more_params))
        cause = unwraplist([Except.wrap(c) for c in listwrap(cause)])
        trace = exceptions.extract_stack(stack_depth + 1)

        e = Except(exceptions.WARNING, template=template, params=params, cause=cause, trace=trace)
        Log._annotate(
            e,
            timestamp,
            stack_depth+1
        )
Пример #48
0
    def error(
        cls,
        template,  # human readable template
        default_params={},  # parameters for template
        cause=None,  # pausible cause
        stack_depth=0,
        **more_params
    ):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if not isinstance(template, unicode):
            sys.stderr.write("Log.error was expecting a unicode template")
            Log.error("Log.error was expecting a unicode template")

        if default_params and isinstance(listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = dict(unwrap(default_params), **more_params)

        add_to_trace = False
        cause = wrap(unwraplist([Except.wrap(c, stack_depth=1) for c in listwrap(cause)]))
        trace = exceptions.extract_stack(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(exceptions.ERROR, template, params, cause, trace)
        raise e
Пример #49
0
 def __data__(self):
     return wrap({
         "meta": {"format": "list"},
         "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data]
     })
Пример #50
0
    def _normalize_revision(self, r, found_revision, push, get_diff, get_moves):
        new_names = set(r.keys()) - KNOWN_TAGS
        if new_names and not r.tags:
            Log.warning(
                "hg is returning new property names {{names|quote}} for {{changeset}} from {{url}}",
                names=new_names,
                changeset=r.node,
                url=found_revision.branch.url
            )

        changeset = Changeset(
            id=r.node,
            id12=r.node[0:12],
            author=r.user,
            description=strings.limit(coalesce(r.description, r.desc), 2000),
            date=parse_hg_date(r.date),
            files=r.files,
            backedoutby=r.backedoutby if r.backedoutby else None,
            bug=self._extract_bug_id(r.description)
        )
        rev = Revision(
            branch=found_revision.branch,
            index=r.rev,
            changeset=changeset,
            parents=unwraplist(list(set(r.parents))),
            children=unwraplist(list(set(r.children))),
            push=push,
            phase=r.phase,
            bookmarks=unwraplist(r.bookmarks),
            landingsystem=r.landingsystem,
            etl={"timestamp": Date.now().unix, "machine": machine_metadata}
        )

        r.pushuser = None
        r.pushdate = None
        r.pushid = None
        r.node = None
        r.user = None
        r.desc = None
        r.description = None
        r.date = None
        r.files = None
        r.backedoutby = None
        r.parents = None
        r.children = None
        r.bookmarks = None
        r.landingsystem = None

        set_default(rev, r)

        # ADD THE DIFF
        if get_diff:
            rev.changeset.diff = self._get_json_diff_from_hg(rev)
        if get_moves:
            rev.changeset.moves = self._get_moves_from_hg(rev)

        try:
            _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name + "-" + coalesce(rev.branch.locale, DEFAULT_LOCALE)
            with self.es_locker:
                self.es.add({"id": _id, "value": rev})
        except Exception as e:
            e = Except.wrap(e)
            Log.warning("Did not save to ES, waiting {{duration}} seconds", duration=WAIT_AFTER_NODE_FAILURE, cause=e)
            Till(seconds=WAIT_AFTER_NODE_FAILURE).wait()
            if "FORBIDDEN/12/index read-only" in e:
                pass  # KNOWN FAILURE MODE

        return rev
Пример #51
0
def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param parent: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = python_type_to_json_type[d.__class__]

        if row_type != "object":
            # EXPECTING PRIMITIVE VALUE
            full_name = parent
            column = columns[full_name]
            if not column:
                column = Column(
                    name=concat_field(table_name, full_name),
                    es_column=full_name,
                    es_index=".",
                    es_type=d.__class__.__name__,
                    jx_type=None,  # WILL BE SET BELOW
                    last_updated=Date.now(),
                    nested_path=nested_path,
                )
                columns.add(column)
            column.es_type = _merge_python_type(column.es_type, d.__class__)
            column.jx_type = python_type_to_json_type[column.es_type]
        else:
            for name, value in d.items():
                full_name = concat_field(parent, name)
                column = columns[full_name]
                if not column:
                    column = Column(
                        name=concat_field(table_name, full_name),
                        es_column=full_name,
                        es_index=".",
                        es_type=value.__class__.__name__,
                        jx_type=None,  # WILL BE SET BELOW
                        last_updated=Date.now(),
                        nested_path=nested_path,
                    )
                    columns.add(column)
                if is_container(value):  # GET TYPE OF MULTIVALUE
                    v = list(value)
                    if len(v) == 0:
                        this_type = none_type.__name__
                    elif len(v) == 1:
                        this_type = v[0].__class__.__name__
                    else:
                        this_type = reduce(
                            _merge_python_type, (vi.__class__.__name__ for vi in value)
                        )
                else:
                    this_type = value.__class__.__name__
                column.es_type = _merge_python_type(column.es_type, this_type)
                column.jx_type = python_type_to_json_type[column.es_type]

                if this_type in {"object", "dict", "Mapping", "Data"}:
                    _get_schema_from_list(
                        [value], table_name, full_name, nested_path, columns
                    )
                elif this_type in {"list", "FlatList"}:
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(
                        value, table_name, full_name, newpath, columns
                    )
Пример #52
0
    def update(self, command):
        self.dirty = True
        try:
            command = wrap(command)
            DEBUG and Log.note(
                "Update {{timestamp}}: {{command|json}}",
                command=command,
                timestamp=Date(command["set"].last_updated),
            )
            eq = command.where.eq
            if eq.es_index:
                if len(eq) == 1:
                    if unwraplist(command.clear) == ".":
                        with self.locker:
                            del self.data[eq.es_index]
                        self.todo.add(
                            (
                                EXECUTE,
                                "DELETE FROM "
                                + db_table_name
                                + SQL_WHERE
                                + " es_index="
                                + quote_value(eq.es_index),
                            )
                        )
                        return

                    # FASTEST
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [c for cs in all_columns for c in cs]
                elif eq.es_column and len(eq) == 2:
                    # FASTER
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [
                            c
                            for cs in all_columns
                            for c in cs
                            if c.es_column == eq.es_column
                        ]

                else:
                    # SLOWER
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [
                            c
                            for cs in all_columns
                            for c in cs
                            if all(
                                c[k] == v for k, v in eq.items()
                            )  # THIS LINE IS VERY SLOW
                        ]
            else:
                columns = list(self)
                columns = jx.filter(columns, command.where)

            with self.locker:
                for col in columns:
                    DEBUG and Log.note(
                        "update column {{table}}.{{column}}",
                        table=col.es_index,
                        column=col.es_column,
                    )
                    for k in command["clear"]:
                        if k == ".":
                            self.todo.add((DELETE, col))
                            lst = self.data[col.es_index]
                            cols = lst[col.name]
                            cols.remove(col)
                            if len(cols) == 0:
                                del lst[col.name]
                                if len(lst) == 0:
                                    del self.data[col.es_index]
                            break
                        else:
                            col[k] = None
                    else:
                        # DID NOT DELETE COLUMNM ("."), CONTINUE TO SET PROPERTIES
                        for k, v in command.set.items():
                            col[k] = v
                        self.todo.add((UPDATE, col))

        except Exception as e:
            Log.error("should not happen", cause=e)
Пример #53
0
 def get_value_from_row(self, row):
     values = row[self.start]['key'].replace("||", "\b").split("|")
     if len(values) == 2:
         return None
     return unwraplist([v.replace("\b", "|") for v in values[1:-1]])
Пример #54
0
 def selector(d):
     output = Data()
     for n, p in push_and_pull:
         output[n] = unwraplist(p(wrap(d)))
     return unwrap(output)
Пример #55
0
def _normalize_edge(edge, dim_index, limit, schema=None):
    """
    :param edge: Not normalized edge
    :param dim_index: Dimensions are ordered; this is this edge's index into that order
    :param schema: for context
    :return: a normalized edge
    """
    if not _Column:
        _late_import()

    if edge == None:
        Log.error("Edge has no value, or expression is empty")
    elif isinstance(edge, text_type):
        if schema:
            leaves = unwraplist(list(schema.leaves(edge)))
            if not leaves or isinstance(leaves, (list, set)):
                return [
                    Data(
                        name=edge,
                        value=jx_expression(edge, schema=schema),
                        allowNulls=True,
                        dim=dim_index
                    )
                ]
            elif isinstance(leaves, _Column):
                return [Data(
                    name=edge,
                    value=jx_expression(edge, schema=schema),
                    allowNulls=True,
                    dim=dim_index,
                    domain=_normalize_domain(domain=leaves, limit=limit, schema=schema)
                )]
            elif isinstance(leaves.fields, list) and len(leaves.fields) == 1:
                return [Data(
                    name=leaves.name,
                    value=jx_expression(leaves.fields[0], schema=schema),
                    allowNulls=True,
                    dim=dim_index,
                    domain=leaves.getDomain()
                )]
            else:
                return [Data(
                    name=leaves.name,
                    allowNulls=True,
                    dim=dim_index,
                    domain=leaves.getDomain()
                )]
        else:
            return [
                Data(
                    name=edge,
                    value=jx_expression(edge, schema=schema),
                    allowNulls=True,
                    dim=dim_index
                )
            ]
    else:
        edge = wrap(edge)
        if not edge.name and not isinstance(edge.value, text_type):
            Log.error("You must name compound and complex edges: {{edge}}", edge=edge)

        if isinstance(edge.value, (list, set)) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Data(fields=edge.value)

            return [Data(
                name=edge.name,
                value=jx_expression(edge.value, schema=schema),
                allowNulls=bool(coalesce(edge.allowNulls, True)),
                dim=dim_index,
                domain=domain
            )]

        domain = _normalize_domain(edge.domain, schema=schema)

        return [Data(
            name=coalesce(edge.name, edge.value),
            value=jx_expression(edge.value, schema=schema),
            range=_normalize_range(edge.range),
            allowNulls=bool(coalesce(edge.allowNulls, True)),
            dim=dim_index,
            domain=domain
        )]
Пример #56
0
def tuid_endpoint(path):
    with RegisterThread():
        try:
            service.statsdaemon.update_requests(requests_total=1)

            if flask.request.headers.get("content-length", "") in ["", "0"]:
                # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
                service.statsdaemon.update_requests(requests_complete=1, requests_passed=1)
                return Response(
                    EXPECTING_QUERY,
                    status=400,
                    headers={
                        "Content-Type": "text/html"
                    }
                )
            elif int(flask.request.headers["content-length"]) > QUERY_SIZE_LIMIT:
                service.statsdaemon.update_requests(requests_complete=1, requests_passed=1)
                return Response(
                    unicode2utf8("request too large"),
                    status=400,
                    headers={
                        "Content-Type": "text/html"
                    }
                )
            request_body = flask.request.get_data().strip()
            query = json2value(utf82unicode(request_body))

            # ENSURE THE QUERY HAS THE CORRECT FORM
            if query['from'] != 'files':
                Log.error("Can only handle queries on the `files` table")

            ands = listwrap(query.where['and'])
            if len(ands) != 3:
                Log.error(
                    'expecting a simple where clause with following structure\n{{example|json}}',
                    example={"and": [
                        {"eq": {"branch": "<BRANCH>"}},
                        {"eq": {"revision": "<REVISION>"}},
                        {"in": {"path": ["<path1>", "<path2>", "...", "<pathN>"]}}
                    ]}
                )

            rev = None
            paths = None
            branch_name = None
            for a in ands:
                rev = coalesce(rev, a.eq.revision)
                paths = unwraplist(coalesce(paths, a['in'].path, a.eq.path))
                branch_name = coalesce(branch_name, a.eq.branch)
            paths = listwrap(paths)

            if len(paths) == 0:
                response, completed = [], True
            elif service.conn.pending_transactions > TOO_BUSY:  # CHECK IF service IS VERY BUSY
                # TODO:  BE SURE TO UPDATE STATS TOO
                Log.note("Too many open transactions")
                response, completed = [], False
            elif service.get_thread_count() > TOO_MANY_THREADS:
                Log.note("Too many threads open")
                response, completed = [], False
            else:
                # RETURN TUIDS
                with Timer("tuid internal response time for {{num}} files", {"num": len(paths)}):
                    response, completed = service.get_tuids_from_files(
                        revision=rev, files=paths, going_forward=True, repo=branch_name
                    )

                if not completed:
                    Log.note(
                        "Request for {{num}} files is incomplete for revision {{rev}}.",
                        num=len(paths), rev=rev
                    )

            if query.meta.format == 'list':
                formatter = _stream_list
            else:
                formatter = _stream_table

            service.statsdaemon.update_requests(
                requests_complete=1 if completed else 0,
                requests_incomplete=1 if not completed else 0,
                requests_passed=1
            )

            return Response(
                formatter(response),
                status=200 if completed else 202,
                headers={
                    "Content-Type": "application/json"
                }
            )
        except Exception as e:
            e = Except.wrap(e)
            service.statsdaemon.update_requests(requests_incomplete=1, requests_failed=1)
            Log.warning("could not handle request", cause=e)
            return Response(
                unicode2utf8(value2json(e, pretty=True)),
                status=400,
                headers={
                    "Content-Type": "text/html"
                }
            )
Пример #57
0
 def __data__(self):
     output = Data({k:getattr(self,k) for k in vars(self)})
     output.cause=unwraplist([c.__data__() for c in listwrap(output.cause)])
     return output