示例#1
0
    def test_status(self, log):
        self.stats.action.test_status += 1
        if not log.test:
            Log.error(
                "log has blank 'test' property! Do not know how to handle.")

        self.logs[literal_field(log.test)] += [log]
        test = self.tests[literal_field(log.test)]
        test.stats.action.test_status += 1
        if not test:
            self.tests[literal_field(log.test)] = test = Dict(
                test=log.test, start_time=log.time, missing_test_start=True)
        test.last_log_time = log.time
        test.stats[log.status.lower()] += 1

        if log.subtest:
            test.subtests += [{
                "name":
                log.subtest,
                "subtest":
                log.subtest,
                "ok":
                True if log.expected == None or log.expected == log.status else
                False,
                "status":
                log.status.lower(),
                "expected":
                log.expected.lower(),
                "timestamp":
                log.time,
                "message":
                log.message,
                "ordering":
                len(test.subtests)
            }]
示例#2
0
 def leaves(self, prefix=None):
     """
     LIKE items() BUT RECURSIVE, AND ONLY FOR THE LEAVES (non dict) VALUES
     """
     prefix = coalesce(prefix, "")
     output = []
     for k, v in self.items():
         if isinstance(v, Mapping):
             output.extend(wrap(v).leaves(prefix=prefix + literal_field(k) + "."))
         else:
             output.append((prefix + literal_field(k), v))
     return output
示例#3
0
 def leaves(self, prefix=None):
     """
     LIKE items() BUT RECURSIVE, AND ONLY FOR THE LEAVES (non dict) VALUES
     """
     prefix = coalesce(prefix, "")
     output = []
     for k, v in self.items():
         if isinstance(v, Mapping):
             output.extend(
                 wrap(v).leaves(prefix=prefix + literal_field(k) + "."))
         else:
             output.append((prefix + literal_field(k), v))
     return output
            def mainthread_transform(r):
                if r == None:
                    return None

                output = Dict()

                for i in r.mainthread_readbytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readbytes = i[0]
                r.mainthread_readbytes = None

                for i in r.mainthread_writebytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writebytes = i[0]
                r.mainthread_writebytes = None

                for i in r.mainthread_readcount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readcount = i[0]
                r.mainthread_readcount = None

                for i in r.mainthread_writecount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writecount = i[0]
                r.mainthread_writecount = None

                r.mainthread = output.values()
            def mainthread_transform(r):
                if r == None:
                    return None

                output = Dict()

                for i in r.mainthread_readbytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readbytes = i[0]
                r.mainthread_readbytes = None

                for i in r.mainthread_writebytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writebytes = i[0]
                r.mainthread_writebytes = None

                for i in r.mainthread_readcount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readcount = i[0]
                r.mainthread_readcount = None

                for i in r.mainthread_writecount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writecount = i[0]
                r.mainthread_writecount = None

                r.mainthread = output.values()
示例#6
0
    def _index_values(self, doc, start_index, parent_index=-1, prefix=""):
        curr_index = doc[INDEX] = start_index
        doc[PARENT] = parent_index
        _index = self._index

        for k, v in doc.items():
            k = literal_field(k)
            _type = _type_map[v.__class__]
            if _type == "object":
                self._index_values(v, start_index, prefix=k + ".")
                v = "."
            elif _type == "nested":
                for vv in v:
                    curr_index = self._index_values(vv, curr_index + 1, start_index, prefix=k + ".")
                _type = "object"
                v = "."

            typed_key = k + ".$" + _type
            i = _index.get(typed_key)
            if i is None:
                i = _index[typed_key] = {}
            j = i.get(v)
            if j is None:
                j = i[v] = set()
            j |= {start_index}
        return curr_index
示例#7
0
    def _index_values(self, doc, start_index, parent_index=-1, prefix=""):
        curr_index = doc[INDEX] = start_index
        doc[PARENT] = parent_index
        _index = self._index

        for k, v in doc.items():
            k = literal_field(k)
            _type = _type_map[v.__class__]
            if _type == "object":
                self._index_values(v, start_index, prefix=k + ".")
                v = "."
            elif _type == "nested":
                for vv in v:
                    curr_index = self._index_values(vv,
                                                    curr_index + 1,
                                                    start_index,
                                                    prefix=k + ".")
                _type = "object"
                v = "."

            typed_key = k + ".$" + _type
            i = _index.get(typed_key)
            if i is None:
                i = _index[typed_key] = {}
            j = i.get(v)
            if j is None:
                j = i[v] = set()
            j |= {start_index}
        return curr_index
示例#8
0
def format_list(T, select, source):
    data = []
    for row in T:
        r = Dict()
        for s in select:
            if s.value == ".":
                r[s.name] = row[source]
            else:
                if source == "_source":
                    r[s.name] = unwraplist(row[source][s.value])
                elif isinstance(s.value, basestring):  # fields
                    r[s.name] = unwraplist(row[source][literal_field(s.value)])
                else:
                    r[s.name] = unwraplist(row[source][literal_field(s.name)])
        data.append(r)
    return Dict(meta={"format": "list"}, data=data)
示例#9
0
def es_setop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.fields = DictList()
    es_query.sort = qb_sort_to_es_sort(query.sort)
    source = "fields"
    for s in select:
        if s.value == "*":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif s.value == ".":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            es_query.fields.append(s.value)
        elif isinstance(s.value, list) and es_query.fields is not None:
            es_query.fields.extend(s.value)
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": qb_expression_to_ruby(s.value)}

    return extract_rows(es, es_query, source, select, query)
def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta=None):
    show_detail=True
    try:
        if test==None and expected==None:
            return
        elif isinstance(test, UniqueIndex):
            if test ^ expected:
                Log.error("Sets do not match")
        elif isinstance(expected, Mapping):
            for k, v2 in expected.items():
                if isinstance(k, basestring):
                    v1 = dot.get_attr(test, literal_field(k))
                else:
                    show_deta =False
                    v1 = test[k]
                assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta)
        elif isinstance(test, set) and isinstance(expected, set):
            if test ^ expected:
                Log.error("Sets do not match")
        elif isinstance(expected, types.FunctionType):
            return expected(test)
        elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"):
            for a, b in zipall(test, expected):
                assertAlmostEqual(a, b, msg=msg, digits=digits, places=places, delta=delta)
        else:
            assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta)
    except Exception, e:
        Log.error(
            "{{test|json}} does not match expected {{expected|json}}",
            test=test if show_detail else "[can not show]",
            expected=expected if show_detail else "[can not show]",
            cause=e
        )
示例#11
0
def es_setop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.fields = DictList()
    es_query.sort = qb_sort_to_es_sort(query.sort)
    source = "fields"
    for s in select:
        if s.value == "*":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif s.value == ".":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            es_query.fields.append(s.value)
        elif isinstance(s.value, list) and es_query.fields is not None:
            es_query.fields.extend(s.value)
        else:
            es_query.script_fields[literal_field(s.name)] = {
                "script": qb_expression_to_ruby(s.value)
            }

    return extract_rows(es, es_query, source, select, query)
示例#12
0
    def test_end(self, log):
        self.logs[literal_field(log.test)] += [log]
        test = self.tests[literal_field(log.test)]
        if not test:
            self.tests[literal_field(log.test)] = test = Dict(
                test=log.test, start_time=log.time, missing_test_start=True)

        test.ok = True if log.expected == None or log.expected == log.status else False
        if not all(test.subtests.ok):
            test.ok = False
        test.result = log.status  #TODO: REMOVE ME AFTER November 2015
        test.status = log.status
        test.expected = coalesce(log.expected, log.status)
        test.end_time = log.time
        test.duration = coalesce(test.end_time - test.start_time,
                                 log.extra.runtime)
        test.extra = test.extra
示例#13
0
def leaves(value, prefix=None):
    """
    LIKE items() BUT RECURSIVE, AND ONLY FOR THE LEAVES (non dict) VALUES
    :param value: THE Mapping TO TRAVERSE
    :param prefix:  OPTIONAL PREFIX GIVEN TO EACH KEY
    :return: Dict, WHICH EACH KEY BEING A PATH INTO value TREE
    """
    prefix = coalesce(prefix, "")
    output = []
    for k, v in value.items():
        try:
            if isinstance(v, Mapping):
                output.extend(leaves(v, prefix=prefix + literal_field(k) + "."))
            else:
                output.append((prefix + literal_field(k), unwrap(v)))
        except Exception, e:
            from pyLibrary.debugs.logs import Log

            Log.error("Do not know how to handle", cause=e)
示例#14
0
    def log(self, log):
        self.stats.action.log += 1
        if not log.test:
            return

        self.logs[literal_field(log.test)] += [log]
        test = self.tests[literal_field(log.test)]
        test.stats.action.log += 1
        if not test:
            self.tests[literal_field(log.test)] = test = wrap({
                "test":
                log.test,
                "start_time":
                log.time,
                "missing_test_start":
                True,
            })
        test.last_log_time = log.time
        test.stats.action.log += 1
示例#15
0
    def crash(self, log):
        self.stats.action.crash += 1
        if not log.test:
            return

        self.logs[literal_field(log.test)] += [log]
        test = self.tests[literal_field(log.test)]
        if not test:
            self.tests[literal_field(log.test)] = test = Dict(
                test=log.test,
                start_time=log.time,
                crash=True,
                missing_test_start=True)

        test.ok = False
        test.result = log.status  #TODO: REMOVE ME AFTER November 2015
        test.status = log.status
        test.last_log_time = log.time
        test.missing_test_end = True
示例#16
0
def format_list(T, select, source):
    data = []
    for row in T:
        r = Dict()
        for s in select:
            if s.value == ".":
                r[s.name] = row[source]
            else:
                if source=="_source":
                    r[s.name] = unwraplist(row[source][s.value])
                elif isinstance(s.value, basestring):  # fields
                    r[s.name] = unwraplist(row[source][literal_field(s.value)])
                else:
                    r[s.name] = unwraplist(row[source][literal_field(s.name)])
        data.append(r)
    return Dict(
        meta={"format": "list"},
        data=data
    )
示例#17
0
def _merge_mapping(a, b):
    """
    MERGE TWO MAPPINGS, a TAKES PRECEDENCE
    """
    for name, b_details in b.items():
        a_details = a[literal_field(name)]
        if a_details.properties and not a_details.type:
            a_details.type = "object"
        if b_details.properties and not b_details.type:
            b_details.type = "object"

        if a_details:
            a_details.type = _merge_type[a_details.type][b_details.type]

            if b_details.type in ["object", "nested"]:
                _merge_mapping(a_details.properties, b_details.properties)
        else:
            a[literal_field(name)] = deepcopy(b_details)

    return a
示例#18
0
 def inners():
     for t in data.hits.hits:
         for i in t.inner_hits[literal_field(query_path)].hits.hits:
             t._inner = i._source
             for k, e in post_expressions.items():
                 t[k] = e(t)
             yield t
     if more_filter:
         Thread.join(need_more)
         for t in more[0].hits.hits:
             yield t
示例#19
0
 def inners():
     for t in data.hits.hits:
         for i in t.inner_hits[literal_field(query_path)].hits.hits:
             t._inner = i._source
             for k, e in post_expressions.items():
                 t[k] = e(t)
             yield t
     if more_filter:
         Thread.join(need_more)
         for t in more[0].hits.hits:
             yield t
示例#20
0
def assertAlmostEqual(test,
                      expected,
                      digits=None,
                      places=None,
                      msg=None,
                      delta=None):
    show_detail = True
    try:
        if test == None and expected == None:
            return
        elif isinstance(test, UniqueIndex):
            if test ^ expected:
                Log.error("Sets do not match")
        elif isinstance(expected, Mapping):
            for k, v2 in expected.items():
                if isinstance(k, basestring):
                    v1 = dot.get_attr(test, literal_field(k))
                else:
                    show_deta = False
                    v1 = test[k]
                assertAlmostEqual(v1,
                                  v2,
                                  msg=msg,
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        elif isinstance(test, (set, list)) and isinstance(expected, set):
            test = set(test)
            if len(test) != len(expected):
                Log.error(
                    "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}",
                    test=test,
                    expected=expected)

            for e in expected:
                for t in test:
                    try:
                        assertAlmostEqual(t,
                                          e,
                                          msg=msg,
                                          digits=digits,
                                          places=places,
                                          delta=delta)
                        break
                    except Exception, _:
                        pass
                else:
                    Log.error(
                        "Sets do not match. {{value|json}} not found in {{test|json}}",
                        value=e,
                        test=test)

        elif isinstance(expected, types.FunctionType):
            return expected(test)
def _merge_mapping(a, b):
    """
    MERGE TWO MAPPINGS, a TAKES PRECEDENCE
    """
    for name, b_details in b.items():
        a_details = a[literal_field(name)]
        if a_details.properties and not a_details.type:
            a_details.type = "object"
        if b_details.properties and not b_details.type:
            b_details.type = "object"

        if a_details:
            a_details.type = _merge_type[a_details.type][b_details.type]

            if b_details.type in ["object", "nested"]:
                _merge_mapping(a_details.properties, b_details.properties)
        else:
            a[literal_field(name)] = deepcopy(b_details)

    return a
示例#22
0
def leaves(value, prefix=None):
    """
    LIKE items() BUT RECURSIVE, AND ONLY FOR THE LEAVES (non dict) VALUES
    :param value: THE Mapping TO TRAVERSE
    :param prefix:  OPTIONAL PREFIX GIVEN TO EACH KEY
    :return: Dict, WHICH EACH KEY BEING A PATH INTO value TREE
    """
    prefix = coalesce(prefix, "")
    output = []
    for k, v in value.items():
        try:
            if isinstance(v, Mapping):
                output.extend(leaves(v,
                                     prefix=prefix + literal_field(k) + "."))
            else:
                output.append((prefix + literal_field(k), unwrap(v)))
        except Exception, e:
            from pyLibrary.debugs.logs import Log

            Log.error("Do not know how to handle", cause=e)
示例#23
0
    def extend(self, documents):
        parts = Dict()
        for d in wrap(documents):
            parent_key = etl2key(key2etl(d.id).source)
            d.value._id = d.id
            parts[literal_field(parent_key)] += [d.value]

        for k, docs in parts.items():
            self._extend(k, docs)

        return parts.keys()
示例#24
0
    def extend(self, documents):
        parts = Dict()
        for d in wrap(documents):
            parent_key = etl2key(key2etl(d.id).source)
            d.value._id = d.id
            parts[literal_field(parent_key)] += [d.value]

        for k, docs in parts.items():
            self._extend(k, docs)

        return parts.keys()
示例#25
0
def format_table(T, select, source):
    header = [s.name for s in select]
    map = {s.name: i
           for i, s in enumerate(select)}  # MAP FROM name TO COLUMN INDEX
    data = []
    for row in T:
        r = [None] * len(header)
        for s in select:
            if s.value == ".":
                r[map[s.name]] = row[source]
            else:
                if source == "_source":
                    r[map[s.name]] = unwraplist(row[source][s.value])
                elif isinstance(s.value, basestring):  # fields
                    r[map[s.name]] = unwraplist(row[source][literal_field(
                        s.value)])
                else:
                    r[map[s.name]] = unwraplist(row[source][literal_field(
                        s.name)])
        data.append(r)
    return Dict(meta={"format": "table"}, header=header, data=data)
示例#26
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)
        schema = self._es.get_schema()

        # GET IDS OF DOCUMENTS
        results = self._es.search(
            {
                "fields": listwrap(schema._routing.path),
                "query": {
                    "filtered": {"query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter()}
                },
                "size": 200000,
            }
        )

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = DictList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")
            if isinstance(v, Mapping) and v.doc:
                scripts.append({"doc": v.doc})
            else:
                scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()})

        if results.hits.hits:
            updates = []
            for h in results.hits.hits:
                for s in scripts:
                    updates.append(
                        {
                            "update": {
                                "_id": h._id,
                                "_routing": unwraplist(h.fields[literal_field(schema._routing.path)]),
                            }
                        }
                    )
                    updates.append(s)
            content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode("utf-8")
            response = self._es.cluster.post(
                self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}
            )
            if response.errors:
                Log.error(
                    "could not update: {{error}}",
                    error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)],
                )
示例#27
0
def format_table(T, select, source):
    header = [s.name for s in select]
    map = {s.name: i for i, s in enumerate(select)}  # MAP FROM name TO COLUMN INDEX
    data = []
    for row in T:
        r = [None] * len(header)
        for s in select:
            if s.value == ".":
                r[map[s.name]] = row[source]
            else:
                if source == "_source":
                    r[map[s.name]] = unwraplist(row[source][s.value])
                elif isinstance(s.value, basestring):  # fields
                    r[map[s.name]] = unwraplist(row[source][literal_field(s.value)])
                else:
                    r[map[s.name]] = unwraplist(row[source][literal_field(s.name)])
        data.append(r)
    return Dict(
        meta={"format": "table"},
        header=header,
        data=data
    )
示例#28
0
def assertAlmostEqual(test,
                      expected,
                      digits=None,
                      places=None,
                      msg=None,
                      delta=None):
    show_detail = True
    try:
        if test == None and expected == None:
            return
        elif isinstance(test, UniqueIndex):
            if test ^ expected:
                Log.error("Sets do not match")
        elif isinstance(expected, Mapping):
            for k, v2 in expected.items():
                if isinstance(k, basestring):
                    v1 = dot.get_attr(test, literal_field(k))
                else:
                    show_deta = False
                    v1 = test[k]
                assertAlmostEqual(v1,
                                  v2,
                                  msg=msg,
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        elif isinstance(test, set) and isinstance(expected, set):
            if test ^ expected:
                Log.error("Sets do not match")
        elif isinstance(expected, types.FunctionType):
            return expected(test)
        elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"):
            for a, b in zipall(test, expected):
                assertAlmostEqual(a,
                                  b,
                                  msg=msg,
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        else:
            assertAlmostEqualValue(test,
                                   expected,
                                   msg=msg,
                                   digits=digits,
                                   places=places,
                                   delta=delta)
    except Exception, e:
        Log.error("{{test|json}} does not match expected {{expected|json}}",
                  test=test if show_detail else "[can not show]",
                  expected=expected if show_detail else "[can not show]",
                  cause=e)
示例#29
0
def add_to_queue(work_queue, redo, bucket_name):
    now = Date.now()
    for r in redo:
        k = literal_field(r)
        counter[k] += 1
        if counter[k] > 3:
            Log.error("Problem backfilling {{key}}: Tried >=3 times, giving up",  key= r)
            continue

        work_queue.add({
            "bucket": bucket_name,
            "key": r,
            "timestamp": now.unix,
            "date/time": now.format()
        })
示例#30
0
def add_to_queue(work_queue, redo, bucket_name):
    now = Date.now()
    for r in redo:
        k = literal_field(r)
        counter[k] += 1
        if counter[k] > 3:
            Log.error(
                "Problem backfilling {{key}}: Tried >=3 times, giving up",
                key=r)
            continue

        work_queue.add({
            "bucket": bucket_name,
            "key": r,
            "timestamp": now.unix,
            "date/time": now.format()
        })
示例#31
0
 def __init__(self, *args, **kwargs):
     """
     CALLING Dict(**something) WILL RESULT IN A COPY OF something, WHICH
     IS UNLIKELY TO BE USEFUL. USE wrap() INSTEAD
     """
     if DEBUG:
         d = _get(self, "_dict")
         for k, v in kwargs.items():
             d[literal_field(k)] = unwrap(v)
     else:
         if args:
             args0 = args[0]
             if isinstance(args0, Mapping):
                 _set(self, "_dict", args0)
             else:
                 _set(self, "_dict", _get(args[0], "__dict__"))
         elif kwargs:
             _set(self, "_dict", unwrap(kwargs))
         else:
             _set(self, "_dict", {})
示例#32
0
 def __init__(self, *args, **kwargs):
     """
     CALLING Dict(**something) WILL RESULT IN A COPY OF something, WHICH
     IS UNLIKELY TO BE USEFUL. USE wrap() INSTEAD
     """
     if DEBUG:
         d = _get(self, "_dict")
         for k, v in kwargs.items():
             d[literal_field(k)] = unwrap(v)
     else:
         if args:
             args0 = args[0]
             if isinstance(args0, Mapping):
                 _set(self, "_dict", args0)
             else:
                 _set(self, "_dict", _get(args[0], "__dict__"))
         elif kwargs:
             _set(self, "_dict", unwrap(kwargs))
         else:
             _set(self, "_dict", {})
示例#33
0
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD

    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()  # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if is_keyword(s.value):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script":
                        es09.expressions.compile_expression(s.value, query)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {
        s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[
            aggregates[s.aggregate]])
        for s in select
    }
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
示例#34
0
    def __setitem__(self, key, value):
        if key == "":
            from pyLibrary.debugs.logs import Log

            Log.error("key is empty string.  Probably a bad idea")
        if key == None:
            return Null
        if key == ".":
            # SOMETHING TERRIBLE HAPPENS WHEN value IS NOT A Mapping;
            # HOPEFULLY THE ONLY OTHER METHOD RUN ON self IS unwrap()
            v = unwrap(value)
            _set(self, "_dict", v)
            return v
        if isinstance(key, str):
            key = key.decode("utf8")

        try:
            d = _get(self, "_dict")
            value = unwrap(value)
            if key.find(".") == -1:
                if value is None:
                    d.pop(key, None)
                else:
                    d[key] = value
                return self

            seq = _split_field(key)
            for k in seq[:-1]:
                d = _getdefault(d, k)
            if value == None:
                d.pop(seq[-1], None)
            elif d == None:
                d[literal_field(seq[-1])] = value
            else:
                d[seq[-1]] = value
            return self
        except Exception, e:
            raise e
示例#35
0
    def __setitem__(self, key, value):
        if key == "":
            from pyLibrary.debugs.logs import Log

            Log.error("key is empty string.  Probably a bad idea")
        if key == None:
            return Null
        if key == ".":
            # SOMETHING TERRIBLE HAPPENS WHEN value IS NOT A Mapping;
            # HOPEFULLY THE ONLY OTHER METHOD RUN ON self IS unwrap()
            v = unwrap(value)
            _set(self, "_dict", v)
            return v
        if isinstance(key, str):
            key = key.decode("utf8")

        try:
            d = _get(self, "_dict")
            value = unwrap(value)
            if key.find(".") == -1:
                if value is None:
                    d.pop(key, None)
                else:
                    d[key] = value
                return self

            seq = _split_field(key)
            for k in seq[:-1]:
                d = _getdefault(d, k)
            if value == None:
                d.pop(seq[-1], None)
            elif d==None:
                d[literal_field(seq[-1])] = value
            else:
                d[seq[-1]] = value
            return self
        except Exception, e:
            raise e
示例#36
0
def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta=None):
    show_detail=True
    try:
        if test==None and expected==None:
            return
        elif isinstance(test, UniqueIndex):
            if test ^ expected:
                Log.error("Sets do not match")
        elif isinstance(expected, Mapping):
            for k, v2 in expected.items():
                if isinstance(k, basestring):
                    v1 = dot.get_attr(test, literal_field(k))
                else:
                    show_deta =False
                    v1 = test[k]
                assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta)
        elif isinstance(test, (set, list)) and isinstance(expected, set):
            test = set(test)
            if len(test) != len(expected):
                Log.error(
                    "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}",
                    test=test,
                    expected=expected
                )

            for e in expected:
                for t in test:
                    try:
                        assertAlmostEqual(t, e, msg=msg, digits=digits, places=places, delta=delta)
                        break
                    except Exception, _:
                        pass
                else:
                    Log.error("Sets do not match. {{value|json}} not found in {{test|json}}", value=e, test=test)

        elif isinstance(expected, types.FunctionType):
            return expected(test)
示例#37
0
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD


    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()   # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if isinstance(s.value, Variable):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value.var
                    },
                    "facet_filter": simplify_esfilter(query.where.to_esfilter())
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script": jx_expression_to_function(s.value)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select}
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
示例#38
0
def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []    # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = DictList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error("There is more than one open-ended edge: self can not be handled")
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {"aggregate": "count"},
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found",  real_count= len(esFacets),  theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, DictList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error("not implemented yet")  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = DictList()
            constants = DictList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({"name": fedge.domain.name, "value": parts[f]})
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field": calcTerm.field,
                    "value_field": s.value if is_keyword(s.value) else None,
                    "value_script": mvel.compile_expression(s.value) if not is_keyword(s.value) else None,
                    "size": coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter({"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = DictList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
示例#39
0
def es_aggsop(es, frum, query):
    select = listwrap(query.select)

    es_query = Dict()
    new_select = Dict()
    formula = []
    for s in select:
        if s.aggregate == "count" and (s.value == None or s.value == "."):
            s.pull = "doc_count"
        elif is_keyword(s.value):
            new_select[literal_field(s.value)] += [s]
        else:
            formula.append(s)

    for litral_field, many in new_select.items():
        if len(many)>1:
            canonical_name=literal_field(many[0].name)
            es_query.aggs[canonical_name].stats.field = many[0].value
            for s in many:
                if s.aggregate == "count":
                    s.pull = canonical_name + ".count"
                else:
                    s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
        else:
            s = many[0]
            s.pull = literal_field(s.value) + ".value"
            es_query.aggs[literal_field(s.value)][aggregates1_4[s.aggregate]].field = s.value

    for i, s in enumerate(formula):
        new_select[unicode(i)] = s
        s.pull = literal_field(s.name) + ".value"
        es_query.aggs[literal_field(s.name)][aggregates1_4[s.aggregate]].script = qb_expression_to_ruby(s.value)

    decoders = [AggsDecoder(e, query) for e in coalesce(query.edges, query.groupby, [])]
    start = 0
    for d in decoders:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if query.where:
        filter = simplify_esfilter(query.where)
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )

    if len(split_field(frum.name)) > 1:
        es_query = wrap({
            "size": 0,
            "aggs": {"_nested": set_default({
                "nested": {
                    "path": join_field(split_field(frum.name)[1::])
                }
            }, es_query)}
        })

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
        if query.edges:
            output = formatter(decoders, result.aggregations, start, query, select)
        elif query.groupby:
            output = groupby_formatter(decoders, result.aggregations, start, query, select)
        else:
            output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.es_response_time = es_duration.seconds
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",  format= query.format, cause=e)
        Log.error("Some problem", e)
示例#40
0
def extract_rows(es, es_query, query):
    is_list = isinstance(query.select, list)
    select = wrap([s.copy() for s in listwrap(query.select)])
    new_select = DictList()
    column_names = set(c.name for c in query.frum.get_columns() if c.type not in ["object"] and (not c.nested_path or c.abs_name == c.nested_path or not c.nested_path))
    source = "fields"

    i = 0
    for s in select:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if s.value == "*":
            es_query.fields = None
            source = "_source"

            net_columns = column_names - set(select.name)
            for n in net_columns:
                new_select.append({
                    "name": n,
                    "value": n,
                    "put": {"name": n, "index": i, "child": "."}
                })
                i += 1
        elif s.value == ".":
            es_query.fields = None
            source = "_source"

            new_select.append({
                "name": s.name,
                "value": s.value,
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1
        elif s.value == "_id":
            new_select.append({
                "name": s.name,
                "value": s.value,
                "pull": "_id",
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1
        elif isinstance(s.value, basestring) and s.value.endswith(".*") and is_keyword(s.value[:-2]):
            parent = s.value[:-1]
            prefix = len(parent)
            for c in column_names:
                if c.startswith(parent):
                    if es_query.fields is not None:
                        es_query.fields.append(c)

                    new_select.append({
                        "name": s.name + "." + c[prefix:],
                        "value": c,
                        "put": {"name": s.name + "." + c[prefix:], "index": i, "child": "."}
                    })
                    i += 1
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            parent = s.value + "."
            prefix = len(parent)
            net_columns = [c for c in column_names if c.startswith(parent)]
            if not net_columns:
                if es_query.fields is not None:
                    es_query.fields.append(s.value)
                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
            else:
                for n in net_columns:
                    if es_query.fields is not None:
                        es_query.fields.append(n)
                    new_select.append({
                        "name": s.name,
                        "value": n,
                        "put": {"name": s.name, "index": i, "child": n[prefix:]}
                    })
            i += 1
        elif isinstance(s.value, list):
            Log.error("need an example")
            if es_query.fields is not None:
                es_query.fields.extend([v for v in s.value])
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": qb_expression(s.value).to_ruby()}
            new_select.append({
                "name": s.name,
                "pull": "fields." + literal_field(s.name),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    for n in new_select:
        if n.pull:
            continue
        if source == "_source":
            n.pull = join_field(["_source"] + split_field(n.value))
        else:
            n.pull = "fields." + literal_field(n.value)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(T, new_select, query)
        output.meta.es_response_time = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)
示例#41
0
 def process_output(self, log):
     self.logs[literal_field(log.test)] += [log]
     self.stats.action.process_output += 1
     pass
示例#42
0
 def __init__(self, edge, query):
     self.start = None
     self.edge = edge
     self.name = literal_field(self.edge.name)
示例#43
0
def es_aggsop(es, frum, query):
    select = wrap([s.copy() for s in listwrap(query.select)])
    es_column_map = {c.name: unwraplist(c.es_column) for c in frum.schema.all_columns}

    es_query = Dict()
    new_select = Dict()  #MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
            s.pull = "doc_count"
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                if frum.typed:
                    # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        #TODO: HANDLE BOTH $value AND $objects TO COUNT
                        Log.error("do not know how to handle")
                    else:
                        s.value.var = "$value"
                        new_select["$value"] += [s]
                else:
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        #TODO:  WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT
                        Log.error("do not know how to handle")
                    else:
                        Log.error('Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate)
            elif s.aggregate == "count":
                s.value = s.value.map(es_column_map)
                new_select["count_"+literal_field(s.value.var)] += [s]
            else:
                s.value = s.value.map(es_column_map)
                new_select[literal_field(s.value.var)] += [s]
        else:
            formula.append(s)

    for canonical_name, many in new_select.items():
        representative = many[0]
        if representative.value.var == ".":
            Log.error("do not know how to handle")
        else:
            field_name = representative.value.var

        # canonical_name=literal_field(many[0].name)
        for s in many:
            if s.aggregate == "count":
                es_query.aggs[literal_field(canonical_name)].value_count.field = field_name
                s.pull = literal_field(canonical_name) + ".value"
            elif s.aggregate == "median":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = key + ".values.50\.0"
            elif s.aggregate == "percentile":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")
                if isinstance(s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile:
                    Log.error("Expecting percentile to be a float from 0.0 to 1.0")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [percent]
                s.pull = key + ".values." + literal_field(unicode(percent))
            elif s.aggregate == "cardinality":
                # ES USES DIFFERENT METHOD FOR CARDINALITY
                key = literal_field(canonical_name + " cardinality")

                es_query.aggs[key].cardinality.field = field_name
                s.pull = key + ".value"
            elif s.aggregate == "stats":
                # REGULAR STATS
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].extended_stats.field = field_name

                # GET MEDIAN TOO!
                median_name = literal_field(canonical_name + " percentile")
                es_query.aggs[median_name].percentiles.field = field_name
                es_query.aggs[median_name].percentiles.percents += [50]

                s.pull = {
                    "count": stats_name + ".count",
                    "sum": stats_name + ".sum",
                    "min": stats_name + ".min",
                    "max": stats_name + ".max",
                    "avg": stats_name + ".avg",
                    "sos": stats_name + ".sum_of_squares",
                    "std": stats_name + ".std_deviation",
                    "var": stats_name + ".variance",
                    "median": median_name + ".values.50\.0"
                }
            elif s.aggregate == "union":
                # USE TERMS AGGREGATE TO SIMULATE union
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].terms.field = field_name
                es_query.aggs[stats_name].terms.size = Math.min(s.limit, MAX_LIMIT)
                s.pull = stats_name + ".buckets.key"
            else:
                # PULL VALUE OUT OF THE stats AGGREGATE
                es_query.aggs[literal_field(canonical_name)].extended_stats.field = field_name
                s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate]

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)
        abs_value = s.value.map(es_column_map)

        if s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby()
            s.pull = literal_field(canonical_name) + ".value"
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = key + ".values.50\.0"
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = key + ".values." + literal_field(unicode(percent))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = abs_value.to_ruby()
            s.pull = key + ".value"
        elif s.aggregate == "stats":
            # REGULAR STATS
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].extended_stats.script = abs_value.to_ruby()

            # GET MEDIAN TOO!
            median_name = literal_field(canonical_name + " percentile")
            es_query.aggs[median_name].percentiles.script = abs_value.to_ruby()
            es_query.aggs[median_name].percentiles.percents += [50]

            s.pull = {
                "count": stats_name + ".count",
                "sum": stats_name + ".sum",
                "min": stats_name + ".min",
                "max": stats_name + ".max",
                "avg": stats_name + ".avg",
                "sos": stats_name + ".sum_of_squares",
                "std": stats_name + ".std_deviation",
                "var": stats_name + ".variance",
                "median": median_name + ".values.50\.0"
            }
        elif s.aggregate=="union":
            # USE TERMS AGGREGATE TO SIMULATE union
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby()
            s.pull = stats_name + ".buckets.key"
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
            es_query.aggs[canonical_name].extended_stats.script = abs_value.to_ruby()

    decoders = get_decoders_by_depth(query)
    start = 0

    vars_ = query.where.vars()

    #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(query.where, schema=frum, map_=es_column_map)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2::]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            #TODO: INCLUDE FILTERS ON EDGES
            filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter())
            es_query = Dict(
                aggs={"_filter": set_default({"filter": filter_}, es_query)}
            )

        es_query = wrap({
            "aggs": {"_nested": set_default(
                {
                    "nested": {
                        "path": frum.query_path
                    }
                },
                es_query
            )}
        })
    else:
        if any(split_where[1::]):
            Log.error("Where clause is too deep")

    for d in decoders[0]:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if split_where[0]:
        #TODO: INCLUDE FILTERS ON EDGES
        filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter())
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting")
        with format_time:
            decoders = [d for ds in decoders for d in ds]
            result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
            if query.edges:
                output = formatter(decoders, result.aggregations, start, query, select)
            elif query.groupby:
                output = groupby_formatter(decoders, result.aggregations, start, query, select)
            else:
                output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", e)
示例#44
0
def es_deepop(es, query):
    columns = query.frum.get_columns(query.frum.name)
    query_path = query.frum.query_path
    columns = UniqueIndex(keys=["name"],
                          data=sorted(
                              columns,
                              lambda a, b: cmp(len(listwrap(b.nested_path)),
                                               len(listwrap(a.nested_path)))),
                          fail_on_dup=False)
    map_to_es_columns = {c.name: c.es_column for c in columns}
    map_to_local = {
        c.name: "_inner" + c.es_column[len(listwrap(c.nested_path)[0]):]
        if c.nested_path else "fields." + literal_field(c.es_column)
        for c in columns
    }
    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es14.util.es_query_template(query.frum.name)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, query.frum,
                                       map_to_es_columns)
    for i, f in enumerate(es_filters):
        # PROBLEM IS {"match_all": {}} DOES NOT SURVIVE set_default()
        for k, v in unwrap(
                simplify_esfilter(AndOp("and",
                                        wheres[i]).to_esfilter())).items():
            f[k] = v

    if not wheres[1]:
        more_filter = {
            "and": [
                simplify_esfilter(AndOp("and", wheres[0]).to_esfilter()), {
                    "not": {
                        "nested": {
                            "path": query_path,
                            "filter": {
                                "match_all": {}
                            }
                        }
                    }
                }
            ]
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = []

    is_list = isinstance(query.select, list)
    new_select = DictList()

    def get_pull(column):
        if column.nested_path:
            return "_inner" + column.es_column[
                len(listwrap(column.nested_path)[0]):]
        else:
            return "fields." + literal_field(column.es_column)

    i = 0
    for s in listwrap(query.select):
        if isinstance(s.value, LeavesOp):
            if isinstance(s.value.term, Variable):
                if s.value.term.var == ".":
                    # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
                    for c in columns:
                        if c.relative and c.type not in ["nested", "object"]:
                            if not c.nested_path:
                                es_query.fields += [c.es_column]
                            new_select.append({
                                "name":
                                c.name,
                                "pull":
                                get_pull(c),
                                "nested_path":
                                listwrap(c.nested_path)[0],
                                "put": {
                                    "name": literal_field(c.name),
                                    "index": i,
                                    "child": "."
                                }
                            })
                            i += 1

                    # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
                    col_names = [c.name for c in columns if c.relative]
                    for n in new_select:
                        if n.name.startswith("..") and n.name.lstrip(
                                ".") not in col_names:
                            n.name = n.put.name = n.name.lstrip(".")
                else:
                    column = s.term.value.var + "."
                    prefix = len(column)
                    for c in columns:
                        if c.name.startswith(column) and c.type not in [
                                "object", "nested"
                        ]:
                            pull = get_pull(c)
                            if len(listwrap(c.nested_path)) == 0:
                                es_query.fields += [c.es_column]

                            new_select.append({
                                "name":
                                s.name + "." + c.name[prefix:],
                                "pull":
                                pull,
                                "nested_path":
                                listwrap(c.nested_path)[0],
                                "put": {
                                    "name":
                                    s.name + "." +
                                    literal_field(c.name[prefix:]),
                                    "index":
                                    i,
                                    "child":
                                    "."
                                }
                            })
                            i += 1
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                for c in columns:
                    if c.relative and c.type not in ["nested", "object"]:
                        if not c.nested_path:
                            es_query.fields += [c.es_column]
                        new_select.append({
                            "name":
                            c.name,
                            "pull":
                            get_pull(c),
                            "nested_path":
                            listwrap(c.nested_path)[0],
                            "put": {
                                "name": ".",
                                "index": i,
                                "child": c.es_column
                            }
                        })
                i += 1
            elif s.value.var == "_id":
                new_select.append({
                    "name": s.name,
                    "value": s.value.var,
                    "pull": "_id",
                    "put": {
                        "name": s.name,
                        "index": i,
                        "child": "."
                    }
                })
                i += 1
            else:
                column = columns[(s.value.var, )]
                parent = column.es_column + "."
                prefix = len(parent)
                net_columns = [
                    c for c in columns if c.es_column.startswith(parent)
                    and c.type not in ["object", "nested"]
                ]
                if not net_columns:
                    pull = get_pull(column)
                    if not column.nested_path:
                        es_query.fields += [column.es_column]
                    new_select.append({
                        "name":
                        s.name,
                        "pull":
                        pull,
                        "nested_path":
                        listwrap(column.nested_path)[0],
                        "put": {
                            "name": s.name,
                            "index": i,
                            "child": "."
                        }
                    })
                else:
                    done = set()
                    for n in net_columns:
                        # THE COLUMNS CAN HAVE DUPLICATE REFERNCES TO THE SAME ES_COLUMN
                        if n.es_column in done:
                            continue
                        done.add(n.es_column)

                        pull = get_pull(n)
                        if not n.nested_path:
                            es_query.fields += [n.es_column]
                        new_select.append({
                            "name":
                            s.name,
                            "pull":
                            pull,
                            "nested_path":
                            listwrap(n.nested_path)[0],
                            "put": {
                                "name": s.name,
                                "index": i,
                                "child": n.es_column[prefix:]
                            }
                        })
                i += 1
        else:
            expr = s.value
            for v in expr.vars():
                for n in columns:
                    if n.name == v:
                        if not n.nested_path:
                            es_query.fields += [n.es_column]

            pull = EXPRESSION_PREFIX + s.name
            post_expressions[pull] = compile_expression(
                expr.map(map_to_local).to_python())

            new_select.append({
                "name": s.name if is_list else ".",
                "pull": pull,
                "value": expr.to_dict(),
                "put": {
                    "name": s.name,
                    "index": i,
                    "child": "."
                }
            })
            i += 1

    # <COMPLICATED> ES needs two calls to get all documents
    more = []

    def get_more(please_stop):
        more.append(
            es09.util.post(es, Dict(filter=more_filter,
                                    fields=es_query.fields), query.limit))

    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t

    #</COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)
示例#45
0
    def _update_cardinality(self, c):
        """
        QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN
        """
        if c.type in ["object", "nested"]:
            Log.error("not supported")
        try:
            if c.table == "meta.columns":
                with self.meta.columns.locker:
                    partitions = jx.sort([
                        g[c.es_column]
                        for g, _ in jx.groupby(self.meta.columns, c.es_column)
                        if g[c.es_column] != None
                    ])
                    self.meta.columns.update({
                        "set": {
                            "partitions": partitions,
                            "count": len(self.meta.columns),
                            "cardinality": len(partitions),
                            "last_updated": Date.now()
                        },
                        "where": {
                            "eq": {
                                "table": c.table,
                                "es_column": c.es_column
                            }
                        }
                    })
                return
            if c.table == "meta.tables":
                with self.meta.columns.locker:
                    partitions = jx.sort([
                        g[c.es_column]
                        for g, _ in jx.groupby(self.meta.tables, c.es_column)
                        if g[c.es_column] != None
                    ])
                    self.meta.columns.update({
                        "set": {
                            "partitions": partitions,
                            "count": len(self.meta.tables),
                            "cardinality": len(partitions),
                            "last_updated": Date.now()
                        },
                        "where": {
                            "eq": {
                                "table": c.table,
                                "name": c.name
                            }
                        }
                    })
                return

            es_index = c.table.split(".")[0]
            result = self.default_es.post("/" + es_index + "/_search",
                                          data={
                                              "aggs": {
                                                  c.name: _counting_query(c)
                                              },
                                              "size": 0
                                          })
            r = result.aggregations.values()[0]
            count = result.hits.total
            cardinality = coalesce(r.value, r._nested.value,
                                   0 if r.doc_count == 0 else None)
            if cardinality == None:
                Log.error("logic error")

            query = Dict(size=0)
            if cardinality > 1000 or (count >= 30 and cardinality == count
                                      ) or (count >= 1000
                                            and cardinality / count > 0.99):
                Log.note("{{table}}.{{field}} has {{num}} parts",
                         table=c.table,
                         field=c.es_column,
                         num=cardinality)
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": count,
                            "cardinality": cardinality,
                            "last_updated": Date.now()
                        },
                        "clear": ["partitions"],
                        "where": {
                            "eq": {
                                "es_index": c.es_index,
                                "es_column": c.es_column
                            }
                        }
                    })
                return
            elif c.type in _elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
                Log.note("{{field}} has {{num}} parts",
                         field=c.name,
                         num=cardinality)
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": count,
                            "cardinality": cardinality,
                            "last_updated": Date.now()
                        },
                        "clear": ["partitions"],
                        "where": {
                            "eq": {
                                "es_index": c.es_index,
                                "es_column": c.es_column
                            }
                        }
                    })
                return
            elif c.nested_path:
                query.aggs[literal_field(c.name)] = {
                    "nested": {
                        "path": listwrap(c.nested_path)[0]
                    },
                    "aggs": {
                        "_nested": {
                            "terms": {
                                "field": c.es_column,
                                "size": 0
                            }
                        }
                    }
                }
            else:
                query.aggs[literal_field(c.name)] = {
                    "terms": {
                        "field": c.es_column,
                        "size": 0
                    }
                }

            result = self.default_es.post("/" + es_index + "/_search",
                                          data=query)

            aggs = result.aggregations.values()[0]
            if aggs._nested:
                parts = jx.sort(aggs._nested.buckets.key)
            else:
                parts = jx.sort(aggs.buckets.key)

            Log.note("{{field}} has {{parts}}", field=c.name, parts=parts)
            with self.meta.columns.locker:
                self.meta.columns.update({
                    "set": {
                        "count": count,
                        "cardinality": cardinality,
                        "partitions": parts,
                        "last_updated": Date.now()
                    },
                    "where": {
                        "eq": {
                            "es_index": c.es_index,
                            "es_column": c.es_column
                        }
                    }
                })
        except Exception, e:
            if "IndexMissingException" in e and c.table.startswith(
                    TEST_TABLE_PREFIX):
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": 0,
                            "cardinality": 0,
                            "last_updated": Date.now()
                        },
                        "clear": ["partitions"],
                        "where": {
                            "eq": {
                                "es_index": c.es_index,
                                "es_column": c.es_column
                            }
                        }
                    })
            else:
                self.meta.columns.update({
                    "set": {
                        "last_updated": Date.now()
                    },
                    "clear": [
                        "count",
                        "cardinality",
                        "partitions",
                    ],
                    "where": {
                        "eq": {
                            "table": c.table,
                            "es_column": c.es_column
                        }
                    }
                })
                Log.warning(
                    "Could not get {{col.table}}.{{col.es_column}} info",
                    col=c,
                    cause=e)
示例#46
0
def es_deepop(es, query):
    columns = query.frum.get_columns(query.frum.name)
    query_path = query.frum.query_path
    columns = UniqueIndex(keys=["name"], data=sorted(columns, lambda a, b: cmp(len(listwrap(b.nested_path)), len(listwrap(a.nested_path)))), fail_on_dup=False)
    map_to_es_columns = {c.name: c.es_column for c in columns}
    map_to_local = {
        c.name: "_inner" + c.es_column[len(listwrap(c.nested_path)[0]):] if c.nested_path else "fields." + literal_field(c.es_column)
        for c in columns
    }
    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es14.util.es_query_template(query.frum.name)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, query.frum, map_to_es_columns)
    for i, f in enumerate(es_filters):
        # PROBLEM IS {"match_all": {}} DOES NOT SURVIVE set_default()
        for k, v in unwrap(simplify_esfilter(AndOp("and", wheres[i]).to_esfilter())).items():
            f[k] = v


    if not wheres[1]:
        more_filter = {
            "and": [
                simplify_esfilter(AndOp("and", wheres[0]).to_esfilter()),
                {"not": {
                    "nested": {
                        "path": query_path,
                        "filter": {
                            "match_all": {}
                        }
                    }
                }}
            ]
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = []

    is_list = isinstance(query.select, list)
    new_select = DictList()

    def get_pull(column):
        if column.nested_path:
            return "_inner" + column.es_column[len(listwrap(column.nested_path)[0]):]
        else:
            return "fields." + literal_field(column.es_column)

    i = 0
    for s in listwrap(query.select):
        if isinstance(s.value, LeavesOp):
            if isinstance(s.value.term, Variable):
                if s.value.term.var==".":
                    # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
                    for c in columns:
                        if c.relative and c.type not in ["nested", "object"]:
                            if not c.nested_path:
                                es_query.fields += [c.es_column]
                            new_select.append({
                                "name": c.name,
                                "pull": get_pull(c),
                                "nested_path": listwrap(c.nested_path)[0],
                                "put": {"name": literal_field(c.name), "index": i, "child": "."}
                            })
                            i += 1

                    # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
                    col_names = [c.name for c in columns if c.relative]
                    for n in new_select:
                        if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                            n.name = n.put.name = n.name.lstrip(".")
                else:
                    column = s.term.value.var+"."
                    prefix = len(column)
                    for c in columns:
                        if c.name.startswith(column) and c.type not in ["object", "nested"]:
                            pull = get_pull(c)
                            if len(listwrap(c.nested_path)) == 0:
                                es_query.fields += [c.es_column]

                            new_select.append({
                                "name": s.name + "." + c.name[prefix:],
                                "pull": pull,
                                "nested_path": listwrap(c.nested_path)[0],
                                "put": {"name": s.name + "." + literal_field(c.name[prefix:]), "index": i, "child": "."}
                            })
                            i += 1
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                for c in columns:
                    if c.relative and c.type not in ["nested", "object"]:
                        if not c.nested_path:
                            es_query.fields += [c.es_column]
                        new_select.append({
                            "name": c.name,
                            "pull": get_pull(c),
                            "nested_path": listwrap(c.nested_path)[0],
                            "put": {"name": ".", "index": i, "child": c.es_column}
                        })
                i += 1
            elif s.value.var == "_id":
                new_select.append({
                    "name": s.name,
                    "value": s.value.var,
                    "pull": "_id",
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            else:
                column = columns[(s.value.var,)]
                parent = column.es_column+"."
                prefix = len(parent)
                net_columns = [c for c in columns if c.es_column.startswith(parent) and c.type not in ["object", "nested"]]
                if not net_columns:
                    pull = get_pull(column)
                    if not column.nested_path:
                        es_query.fields += [column.es_column]
                    new_select.append({
                        "name": s.name,
                        "pull": pull,
                        "nested_path": listwrap(column.nested_path)[0],
                        "put": {"name": s.name, "index": i, "child": "."}
                    })
                else:
                    done = set()
                    for n in net_columns:
                        # THE COLUMNS CAN HAVE DUPLICATE REFERNCES TO THE SAME ES_COLUMN
                        if n.es_column in done:
                            continue
                        done.add(n.es_column)

                        pull = get_pull(n)
                        if not n.nested_path:
                            es_query.fields += [n.es_column]
                        new_select.append({
                            "name": s.name,
                            "pull": pull,
                            "nested_path": listwrap(n.nested_path)[0],
                            "put": {"name": s.name, "index": i, "child": n.es_column[prefix:]}
                        })
                i += 1
        else:
            expr = s.value
            for v in expr.vars():
                for n in columns:
                    if n.name == v:
                        if not n.nested_path:
                            es_query.fields += [n.es_column]

            pull = EXPRESSION_PREFIX + s.name
            post_expressions[pull] = compile_expression(expr.map(map_to_local).to_python())

            new_select.append({
                "name": s.name if is_list else ".",
                "pull": pull,
                "value": expr.to_dict(),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es09.util.post(
            es,
            Dict(
                filter=more_filter,
                fields=es_query.fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    #</COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)
示例#47
0
def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []  # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = DictList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error(
                    "There is more than one open-ended edge: self can not be handled"
                )
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value))
                    or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions)
                           for f in facetEdges) * len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {
                "aggregate": "count"
            },
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note(
            "{{theory_count}} theoretical combinations, {{real_count}} actual combos found",
            real_count=len(esFacets),
            theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [
                len(e.domain.partitions) + (1 if e.allowNulls else 0)
                for e in query.edges
            ]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, DictList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error(
            "not implemented yet"
        )  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = DictList()
            constants = DictList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({
                    "name": fedge.domain.name,
                    "value": parts[f]
                })
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field":
                    calcTerm.field,
                    "value_field":
                    s.value if is_keyword(s.value) else None,
                    "value_script":
                    mvel.compile_expression(s.value)
                    if not is_keyword(s.value) else None,
                    "size":
                    coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter(
                    {"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = DictList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [
        len(e.domain.partitions) + (1 if e.allowNulls else 0)
        for e in query.edges
    ]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index] + (
                    special.dataIndex, ) + pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
示例#48
0
 def get_pull(column):
     if column.nested_path:
         return "_inner" + column.es_column[
             len(listwrap(column.nested_path)[0]):]
     else:
         return "fields." + literal_field(column.es_column)
示例#49
0
 def __init__(self, edge, query):
     self.start = None
     self.edge = edge
     self.name = literal_field(self.edge.name)
示例#50
0
def _normalize_select(select, frum, schema=None):
    """
    :param select: ONE SELECT COLUMN
    :param frum: TABLE TO get_columns()
    :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS
    :return: AN ARRAY OF SELECT COLUMNS
    """
    if not _Column:
        _late_import()

    if isinstance(select, basestring):
        canonical = select = Dict(value=select)
    else:
        select = wrap(select)
        canonical = select.copy()

    canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name, select.aggregate, "none")
    canonical.default = coalesce(select.default, canonical_aggregates[canonical.aggregate].default)

    if hasattr(frum, "_normalize_select"):
        return frum._normalize_select(canonical)

    output = []
    if not select.value or select.value == ".":
        output.extend(
            [set_default({"name": c.name, "value": jx_expression(c.name)}, canonical) for c in frum.get_leaves()]
        )
    elif isinstance(select.value, basestring):
        if select.value.endswith(".*"):
            base_name = select.value[:-2]
            canonical.name = coalesce(select.name, base_name, select.aggregate)
            value = jx_expression(select[:-2])
            if not isinstance(value, Variable):
                Log.error("`*` over general expression not supported yet")
                output.append(
                    [
                        set_default(
                            {
                                "name": base_name,
                                "value": LeavesOp("leaves", value),
                                "format": "dict",  # MARKUP FOR DECODING
                            },
                            canonical,
                        )
                        for c in frum.get_columns()
                        if c.type not in ["object", "nested"]
                    ]
                )
            else:
                output.extend(
                    [
                        set_default(
                            {
                                "name": base_name + "." + literal_field(c.name[len(base_name) + 1 :]),
                                "value": jx_expression(c.name),
                            },
                            canonical,
                        )
                        for c in frum.get_leaves()
                        if c.name.startswith(base_name + ".")
                    ]
                )
        else:
            canonical.name = coalesce(select.name, select.value, select.aggregate)
            canonical.value = jx_expression(select.value)
            output.append(canonical)

    output = wrap(output)
    if any(n == None for n in output.name):
        Log.error("expecting select to have a name: {{select}}", select=select)
    return output
示例#51
0
 def __init__(self, edge, query, limit):
     self.start = None
     self.edge = edge
     self.name = literal_field(self.edge.name)
     self.query = query
     self.limit = limit
示例#52
0
def _normalize(esfilter):
    """
    TODO: DO NOT USE Dicts, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING
    REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS
    """
    if esfilter is TRUE_FILTER or esfilter is FALSE_FILTER or esfilter.isNormal:
        return esfilter

    # Log.note("from: " + convert.value2json(esfilter))
    isDiff = True

    while isDiff:
        isDiff = False

        if esfilter["and"] != None:
            terms = esfilter["and"]
            # MERGE range FILTER WITH SAME FIELD
            for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)):
                if i0 >= i1:
                    continue  # SAME, IGNORE
                try:
                    f0, tt0 = t0.range.items()[0]
                    f1, tt1 = t1.range.items()[0]
                    if f0 == f1:
                        set_default(terms[i0].range[literal_field(f1)], tt1)
                        terms[i1] = True
                except Exception, e:
                    pass


            output = []
            for a in terms:
                if isinstance(a, (list, set)):
                    from pyLibrary.debugs.logs import Log
                    Log.error("and clause is not allowed a list inside a list")
                a_ = normalize_esfilter(a)
                if a_ is not a:
                    isDiff = True
                a = a_
                if a == TRUE_FILTER:
                    isDiff = True
                    continue
                if a == FALSE_FILTER:
                    return FALSE_FILTER
                if a.get("and"):
                    isDiff = True
                    a.isNormal = None
                    output.extend(a.get("and"))
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return TRUE_FILTER
            elif len(output) == 1:
                # output[0].isNormal = True
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = wrap({"and": output})
            continue

        if esfilter["or"] != None:
            output = []
            for a in esfilter["or"]:
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_

                if a == TRUE_FILTER:
                    return TRUE_FILTER
                if a == FALSE_FILTER:
                    isDiff = True
                    continue
                if a.get("or"):
                    a.isNormal = None
                    isDiff = True
                    output.extend(a["or"])
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return FALSE_FILTER
            elif len(output) == 1:
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = wrap({"or": output})
            continue

        if esfilter.term != None:
            if esfilter.term.keys():
                esfilter.isNormal = True
                return esfilter
            else:
                return TRUE_FILTER

        if esfilter.terms != None:
            for k, v in esfilter.terms.items():
                if len(v) > 0:
                    if OR(vv == None for vv in v):
                        rest = [vv for vv in v if vv != None]
                        if len(rest) > 0:
                            return {
                                "or": [
                                    {"missing": {"field": k}},
                                    {"terms": {k: rest}}
                                ],
                                "isNormal": True
                            }
                        else:
                            return {
                                "missing": {"field": k},
                                "isNormal": True
                            }
                    else:
                        esfilter.isNormal = True
                        return esfilter
            return FALSE_FILTER

        if esfilter["not"] != None:
            _sub = esfilter["not"]
            sub = _normalize(_sub)
            if sub is FALSE_FILTER:
                return TRUE_FILTER
            elif sub is TRUE_FILTER:
                return FALSE_FILTER
            elif sub is not _sub:
                sub.isNormal = None
                return wrap({"not": sub, "isNormal": True})
            else:
                sub.isNormal = None
示例#53
0
def _normalize_select(select, frum, schema=None):
    """
    :param select: ONE SELECT COLUMN
    :param frum: TABLE TO get_columns()
    :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS
    :return: AN ARRAY OF SELECT COLUMNS
    """
    if not _Column:
        _late_import()

    if isinstance(select, basestring):
        canonical = select = Dict(value=select)
    else:
        select = wrap(select)
        canonical = select.copy()

    canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name,
                                   select.aggregate, "none")
    canonical.default = coalesce(
        select.default, canonical_aggregates[canonical.aggregate].default)

    if hasattr(frum, "_normalize_select"):
        return frum._normalize_select(canonical)

    output = []
    if not select.value or select.value == ".":
        output.extend([
            set_default({
                "name": c.name,
                "value": jx_expression(c.name)
            }, canonical) for c in frum.get_leaves()
        ])
    elif isinstance(select.value, basestring):
        if select.value.endswith(".*"):
            base_name = select.value[:-2]
            canonical.name = coalesce(select.name, base_name, select.aggregate)
            value = jx_expression(select[:-2])
            if not isinstance(value, Variable):
                Log.error("`*` over general expression not supported yet")
                output.append([
                    set_default(
                        {
                            "name": base_name,
                            "value": LeavesOp("leaves", value),
                            "format": "dict"  # MARKUP FOR DECODING
                        },
                        canonical) for c in frum.get_columns()
                    if c.type not in ["object", "nested"]
                ])
            else:
                output.extend([
                    set_default(
                        {
                            "name":
                            base_name + "." +
                            literal_field(c.name[len(base_name) + 1:]),
                            "value":
                            jx_expression(c.name)
                        }, canonical) for c in frum.get_leaves()
                    if c.name.startswith(base_name + ".")
                ])
        else:
            canonical.name = coalesce(select.name, select.value,
                                      select.aggregate)
            canonical.value = jx_expression(select.value)
            output.append(canonical)

    output = wrap(output)
    if any(n == None for n in output.name):
        Log.error("expecting select to have a name: {{select}}", select=select)
    return output
示例#54
0
 def test_start(self, log):
     if isinstance(log.test, list):
         log.test = " ".join(log.test)
     self.tests[literal_field(log.test)] = Dict(test=log.test,
                                                start_time=log.time)
     self.last_subtest = log.time
示例#55
0
def extract_rows(es, es_query, query):
    is_list = isinstance(query.select, list)
    select = wrap([s.copy() for s in listwrap(query.select)])
    new_select = DictList()
    columns = query.frum.get_columns()
    leaf_columns = set(c.name for c in columns if c.type not in STRUCT and (not c.nested_path or c.es_column == c.nested_path))
    nested_columns = set(c.name for c in columns if c.nested_path)

    i = 0
    source = "fields"
    for s in select:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(s.value, LeavesOp):
            term = s.value.term
            if isinstance(term, Variable):

                if term.var == ".":
                    es_query.fields = None
                    source = "_source"

                    net_columns = leaf_columns - set(select.name)
                    for n in net_columns:
                        new_select.append({
                            "name": n,
                            "value": Variable(n),
                            "put": {"name": n, "index": i, "child": "."}
                        })
                        i += 1
                else:
                    parent = term.var + "."
                    prefix = len(parent)
                    for c in leaf_columns:
                        if c.startswith(parent):
                            if es_query.fields is not None:
                                es_query.fields.append(c)

                            new_select.append({
                                "name": s.name + "." + c[prefix:],
                                "value": Variable(c),
                                "put": {"name": s.name + "." + c[prefix:], "index": i, "child": "."}
                            })
                            i += 1

        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                es_query.fields = None
                source = "_source"

                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            elif s.value.var == "_id":
                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "pull": "_id",
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            elif s.value.var in nested_columns:
                es_query.fields = None
                source = "_source"

                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            else:
                parent = s.value.var + "."
                prefix = len(parent)
                net_columns = [c for c in leaf_columns if c.startswith(parent)]
                if not net_columns:
                    # LEAF
                    if es_query.fields is not None:
                        es_query.fields.append(s.value.var)
                    new_select.append({
                        "name": s.name,
                        "value": s.value,
                        "put": {"name": s.name, "index": i, "child": "."}
                    })
                else:
                    # LEAVES OF OBJECT
                    for n in net_columns:
                        if es_query.fields is not None:
                            es_query.fields.append(n)
                        new_select.append({
                            "name": s.name,
                            "value": Variable(n),
                            "put": {"name": s.name, "index": i, "child": n[prefix:]}
                        })
                i += 1
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": s.value.to_ruby()}
            new_select.append({
                "name": s.name,
                "pull": "fields." + literal_field(s.name),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    for n in new_select:
        if n.pull:
            continue
        if source == "_source":
            n.pull = join_field(["_source"] + split_field(n.value.var))
        elif isinstance(n.value, Variable):
            n.pull = "fields." + literal_field(n.value.var)
        else:
            Log.error("Do not know what to do")

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)
示例#56
0
    def _update_cardinality(self, c):
        """
        QUERY ES TO FIND CARDINALITY AND PARTITIONS FOR A SIMPLE COLUMN
        """
        if c.type in STRUCT:
            Log.error("not supported")
        try:
            if c.table == "meta.columns":
                with self.meta.columns.locker:
                    partitions = jx.sort([g[c.es_column] for g, _ in jx.groupby(self.meta.columns, c.es_column) if g[c.es_column] != None])
                    self.meta.columns.update({
                        "set": {
                            "partitions": partitions,
                            "count": len(self.meta.columns),
                            "cardinality": len(partitions),
                            "last_updated": Date.now()
                        },
                        "where": {"eq": {"table": c.table, "es_column": c.es_column}}
                    })
                return
            if c.table == "meta.tables":
                with self.meta.columns.locker:
                    partitions = jx.sort([g[c.es_column] for g, _ in jx.groupby(self.meta.tables, c.es_column) if g[c.es_column] != None])
                    self.meta.columns.update({
                        "set": {
                            "partitions": partitions,
                            "count": len(self.meta.tables),
                            "cardinality": len(partitions),
                            "last_updated": Date.now()
                        },
                        "where": {"eq": {"table": c.table, "name": c.name}}
                    })
                return

            es_index = c.table.split(".")[0]
            result = self.default_es.post("/" + es_index + "/_search", data={
                "aggs": {c.name: _counting_query(c)},
                "size": 0
            })
            r = result.aggregations.values()[0]
            count = result.hits.total
            cardinality = coalesce(r.value, r._nested.value, 0 if r.doc_count==0 else None)
            if cardinality == None:
                Log.error("logic error")

            query = Dict(size=0)
            if cardinality > 1000 or (count >= 30 and cardinality == count) or (count >= 1000 and cardinality / count > 0.99):
                Log.note("{{table}}.{{field}} has {{num}} parts", table=c.table, field=c.es_column, num=cardinality)
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": count,
                            "cardinality": cardinality,
                            "last_updated": Date.now()
                        },
                        "clear": ["partitions"],
                        "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
                    })
                return
            elif c.type in _elasticsearch.ES_NUMERIC_TYPES and cardinality > 30:
                Log.note("{{field}} has {{num}} parts", field=c.name, num=cardinality)
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": count,
                            "cardinality": cardinality,
                            "last_updated": Date.now()
                        },
                        "clear": ["partitions"],
                        "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
                    })
                return
            elif c.nested_path:
                query.aggs[literal_field(c.name)] = {
                    "nested": {"path": listwrap(c.nested_path)[0]},
                    "aggs": {"_nested": {"terms": {"field": c.es_column, "size": 0}}}
                }
            else:
                query.aggs[literal_field(c.name)] = {"terms": {"field": c.es_column, "size": 0}}

            result = self.default_es.post("/" + es_index + "/_search", data=query)

            aggs = result.aggregations.values()[0]
            if aggs._nested:
                parts = jx.sort(aggs._nested.buckets.key)
            else:
                parts = jx.sort(aggs.buckets.key)

            Log.note("{{field}} has {{parts}}", field=c.name, parts=parts)
            with self.meta.columns.locker:
                self.meta.columns.update({
                    "set": {
                        "count": count,
                        "cardinality": cardinality,
                        "partitions": parts,
                        "last_updated": Date.now()
                    },
                    "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
                })
        except Exception, e:
            if "IndexMissingException" in e and c.table.startswith(TEST_TABLE_PREFIX):
                with self.meta.columns.locker:
                    self.meta.columns.update({
                        "set": {
                            "count": 0,
                            "cardinality": 0,
                            "last_updated": Date.now()
                        },
                        "clear":[
                            "partitions"
                        ],
                        "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
                    })
            else:
                self.meta.columns.update({
                    "set": {
                        "last_updated": Date.now()
                    },
                    "clear": [
                        "count",
                        "cardinality",
                        "partitions",
                    ],
                    "where": {"eq": {"table": c.table, "es_column": c.es_column}}
                })
                Log.warning("Could not get {{col.table}}.{{col.es_column}} info", col=c, cause=e)
示例#57
0
 def get_pull(column):
     if len(column.nested_path) != 1:
         return "_inner" + column.es_column[len(column.nested_path[0]):]
     else:
         return "fields." + literal_field(column.es_column)
示例#58
0
 def get_pull(column):
     if column.nested_path:
         return "_inner" + column.es_column[len(listwrap(column.nested_path)[0]):]
     else:
         return "fields." + literal_field(column.es_column)
示例#59
0
def extract_rows(es, es_query, query):
    is_list = isinstance(query.select, list)
    select = wrap([s.copy() for s in listwrap(query.select)])
    new_select = DictList()
    columns = query.frum.get_columns()
    leaf_columns = set(c.name for c in columns if c.type not in ["object", "nested"] and (not c.nested_path or c.es_column == c.nested_path))
    nested_columns = set(c.name for c in columns if c.nested_path)

    i = 0
    source = "fields"
    for s in select:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(s.value, LeavesOp):
            if isinstance(s.value.term, Variable):
                if s.value.term.var == ".":
                    es_query.fields = None
                    source = "_source"

                    net_columns = leaf_columns - set(select.name)
                    for n in net_columns:
                        new_select.append({
                            "name": n,
                            "value": n,
                            "put": {"name": n, "index": i, "child": "."}
                        })
                        i += 1
                else:
                    parent = s.value.var + "."
                    prefix = len(parent)
                    for c in leaf_columns:
                        if c.startswith(parent):
                            if es_query.fields is not None:
                                es_query.fields.append(c)

                            new_select.append({
                                "name": s.name + "." + c[prefix:],
                                "value": c,
                                "put": {"name": s.name + "." + c[prefix:], "index": i, "child": "."}
                            })
                            i += 1

        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                es_query.fields = None
                source = "_source"

                new_select.append({
                    "name": s.name,
                    "value": s.value.var,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            elif s.value.var == "_id":
                new_select.append({
                    "name": s.name,
                    "value": s.value.var,
                    "pull": "_id",
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            elif s.value.var in nested_columns:
                es_query.fields = None
                source = "_source"

                new_select.append({
                    "name": s.name,
                    "value": s.value,
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            else:
                parent = s.value.var + "."
                prefix = len(parent)
                net_columns = [c for c in leaf_columns if c.startswith(parent)]
                if not net_columns:
                    # LEAF
                    if es_query.fields is not None:
                        es_query.fields.append(s.value.var)
                    new_select.append({
                        "name": s.name,
                        "value": s.value,
                        "put": {"name": s.name, "index": i, "child": "."}
                    })
                else:
                    # LEAVES OF OBJECT
                    for n in net_columns:
                        if es_query.fields is not None:
                            es_query.fields.append(n)
                        new_select.append({
                            "name": s.name,
                            "value": n,
                            "put": {"name": s.name, "index": i, "child": n[prefix:]}
                        })
                i += 1
        else:
            es_query.script_fields[literal_field(s.name)] = {"script": s.value.to_ruby()}
            new_select.append({
                "name": s.name,
                "pull": "fields." + literal_field(s.name),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    for n in new_select:
        if n.pull:
            continue
        if source == "_source":
            n.pull = join_field(["_source"] + split_field(n.value))
        else:
            n.pull = "fields." + literal_field(n.value)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)