示例#1
0
def parse(*args):
    try:
        if len(args) == 1:
            a0 = args[0]
            if isinstance(a0, (datetime, date)):
                output = _unix2Date(datetime2unix(a0))
            elif isinstance(a0, Date):
                output = _unix2Date(a0.unix)
            elif isinstance(a0, (int, long, float, Decimal)):
                a0 = float(a0)
                if a0 > 9999999999:    # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP
                    output = _unix2Date(a0 / 1000)
                else:
                    output = _unix2Date(a0)
            elif is_text(a0) and len(a0) in [9, 10, 12, 13] and mo_math.is_integer(a0):
                a0 = float(a0)
                if a0 > 9999999999:    # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP
                    output = _unix2Date(a0 / 1000)
                else:
                    output = _unix2Date(a0)
            elif is_text(a0):
                output = unicode2Date(a0)
            else:
                output = _unix2Date(datetime2unix(datetime(*args)))
        else:
            if is_text(args[0]):
                output = unicode2Date(*args)
            else:
                output = _unix2Date(datetime2unix(datetime(*args)))

        return output
    except Exception as e:
        from mo_logs import Log

        Log.error("Can not convert {{args}} to Date", args=args, cause=e)
示例#2
0
def _normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if sort == None:
        return FlatList.EMPTY

    output = FlatList()
    for s in listwrap(sort):
        if is_text(s):
            output.append({"value": jx_expression(s), "sort": 1})
        elif is_expression(s):
            output.append({"value": s, "sort": 1})
        elif mo_math.is_integer(s):
            output.append({"value": jx_expression({"offset": s}), "sort": 1})
        elif not s.sort and not s.value and all(d in sort_direction
                                                for d in s.values()):
            for v, d in s.items():
                output.append({
                    "value": jx_expression(v),
                    "sort": sort_direction[d]
                })
        elif not s.sort and not s.value:
            Log.error("`sort` clause must have a `value` property")
        else:
            output.append({
                "value": jx_expression(coalesce(s.value, s.field)),
                "sort": sort_direction[s.sort]
            })
    return output
示例#3
0
def parse(*args):
    try:
        if len(args) == 1:
            a0 = args[0]
            if isinstance(a0, (datetime, date)):
                output = _unix2Date(datetime2unix(a0))
            elif isinstance(a0, Date):
                output = _unix2Date(a0.unix)
            elif isinstance(a0, (int, long, float, Decimal)):
                a0 = float(a0)
                if a0 > 9999999999:    # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP
                    output = _unix2Date(a0 / 1000)
                else:
                    output = _unix2Date(a0)
            elif is_text(a0) and len(a0) in [9, 10, 12, 13] and mo_math.is_integer(a0):
                a0 = float(a0)
                if a0 > 9999999999:    # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP
                    output = _unix2Date(a0 / 1000)
                else:
                    output = _unix2Date(a0)
            elif is_text(a0):
                output = unicode2Date(a0)
            else:
                output = _unix2Date(datetime2unix(datetime(*args)))
        else:
            if is_text(args[0]):
                output = unicode2Date(*args)
            else:
                output = _unix2Date(datetime2unix(datetime(*args)))

        return output
    except Exception as e:
        from mo_logs import Log

        Log.error("Can not convert {{args}} to Date", args=args, cause=e)
示例#4
0
    def _convert_query(self, query):
        # if not isinstance(query["from"], Container):
        #     Log.error('Expecting from clause to be a Container')
        query = to_data(query)

        output = QueryOp(None)
        output["from"] = self._convert_from(query["from"])

        output.format = query.format

        if query.select:
            output.select = convert_list(self._convert_select, query.select)
        else:
            if query.edges or query.groupby:
                output.select = {
                    "name": "count",
                    "value": ".",
                    "aggregate": "count",
                    "default": 0
                }
            else:
                output.select = {
                    "name": "__all__",
                    "value": "*",
                    "aggregate": "none"
                }

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = convert_list(self._convert_edge, query.edges)
            output.groupby = None
        elif query.groupby:
            output.edges = None
            output.groupby = convert_list(self._convert_group, query.groupby)
        else:
            output.edges = []
            output.groupby = None

        output.where = self.convert(query.where)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_sort(query.sort)

        output.limit = coalesce(query.limit, DEFAULT_LIMIT)
        if not mo_math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN
        # THE from SOURCE IS.
        vars = get_all_vars(
            output, exclude_where=True)  # WE WILL EXCLUDE where VARIABLES
        for c in query.columns:
            if c.name in vars and len(c.nested_path) != 1:
                Log.error("This query, with variable {{var_name}} is too deep",
                          var_name=c.name)

        return output
示例#5
0
    def wrap(query, container, namespace):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if is_op(query, QueryOp) or query == None:
            return query

        query = wrap(query)
        table = container.get_table(query['from'])
        schema = table.schema
        output = QueryOp(
            frum=table,
            format=query.format,
            chunk_size=query.chunk_size,
            destination=query.destination,
        )

        _import_temper_limit()
        output.limit = temper_limit(query.limit, query)

        if query.select or is_many(query.select) or is_data(query.select):
            output.select = _normalize_selects(query.select,
                                               query.frum,
                                               schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = DEFAULT_SELECT
            else:
                output.select = _normalize_selects(".", query.frum)

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = _normalize_edges(query.edges,
                                            limit=output.limit,
                                            schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby,
                                                limit=output.limit,
                                                schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where({"and": listwrap(query.where)},
                                        schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.sort = _normalize_sort(query.sort)
        if output.limit != None and (not mo_math.is_integer(output.limit)
                                     or output.limit < 0):
            Log.error("Expecting limit >= 0")

        return output
示例#6
0
    def wrap(query, container, namespace):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if is_op(query, QueryOp) or query == None:
            return query

        query = wrap(query)
        table = container.get_table(query['from'])
        schema = table.schema
        output = QueryOp(frum=table,
                         format=query.format,
                         limit=mo_math.min(
                             MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)))

        if query.select or isinstance(query.select, (Mapping, list)):
            output.select = _normalize_selects(query.select,
                                               query.frum,
                                               schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = DEFAULT_SELECT
            else:
                output.select = _normalize_selects(".", query.frum)

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = _normalize_edges(query.edges,
                                            limit=output.limit,
                                            schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby,
                                                limit=output.limit,
                                                schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where({"and": listwrap(query.where)},
                                        schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        if not mo_math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
示例#7
0
def scrub_args(args):
    output = {}
    for k, v in list(args.items()):
        vs = []
        for v in listwrap(v):
            if is_integer(v):
                vs.append(int(v))
            elif is_number(v):
                vs.append(float(v))
            else:
                vs.append(v)
        output[k] = unwraplist(vs)
    return wrap(output)
示例#8
0
    def _convert_query(self, query):
        # if not isinstance(query["from"], Container):
        #     Log.error('Expecting from clause to be a Container')
        query = wrap(query)

        output = QueryOp(None)
        output["from"] = self._convert_from(query["from"])

        output.format = query.format

        if query.select:
            output.select = convert_list(self._convert_select, query.select)
        else:
            if query.edges or query.groupby:
                output.select = {"name": "count", "value": ".", "aggregate": "count", "default": 0}
            else:
                output.select = {"name": "__all__", "value": "*", "aggregate": "none"}

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            output.edges = convert_list(self._convert_edge, query.edges)
            output.groupby = None
        elif query.groupby:
            output.edges = None
            output.groupby = convert_list(self._convert_group, query.groupby)
        else:
            output.edges = []
            output.groupby = None

        output.where = self.convert(query.where)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_sort(query.sort)

        output.limit = coalesce(query.limit, DEFAULT_LIMIT)
        if not mo_math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN
        # THE from SOURCE IS.
        vars = get_all_vars(output, exclude_where=True)  # WE WILL EXCLUDE where VARIABLES
        for c in query.columns:
            if c.name in vars and len(c.nested_path) != 1:
                Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name)

        output.having = convert_list(self._convert_having, query.having)

        return output
示例#9
0
    def wrap(query, container, namespace):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if is_op(query, QueryOp) or query == None:
            return query

        query = wrap(query)
        table = container.get_table(query['from'])
        schema = table.schema
        output = QueryOp(
            frum=table,
            format=query.format,
            limit=mo_math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT))
        )

        if query.select or isinstance(query.select, (Mapping, list)):
            output.select = _normalize_selects(query.select, query.frum, schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = DEFAULT_SELECT
            else:
                output.select = _normalize_selects(".", query.frum)

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where(query.where, schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.having = None
        output.sort = _normalize_sort(query.sort)
        if not mo_math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        return output
示例#10
0
def normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if not sort:
        return Null

    output = FlatList()
    for s in listwrap(sort):
        if is_text(s) or mo_math.is_integer(s):
            output.append({"value": s, "sort": 1})
        elif not s.field and not s.value and s.sort==None:
            #ASSUME {name: sort} FORM
            for n, v in s.items():
                output.append({"value": n, "sort": sort_direction[v]})
        else:
            output.append({"value": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1)})
    return wrap(output)
示例#11
0
def normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if not sort:
        return Null

    output = FlatList()
    for s in listwrap(sort):
        if is_text(s) or mo_math.is_integer(s):
            output.append({"value": s, "sort": 1})
        elif not s.field and not s.value and s.sort==None:
            #ASSUME {name: sort} FORM
            for n, v in s.items():
                output.append({"value": n, "sort": sort_direction[v]})
        else:
            output.append({"value": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1)})
    return wrap(output)
示例#12
0
def _normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if sort==None:
        return FlatList.EMPTY

    output = FlatList()
    for s in listwrap(sort):
        if is_text(s):
            output.append({"value": jx_expression(s), "sort": 1})
        elif is_expression(s):
            output.append({"value": s, "sort": 1})
        elif mo_math.is_integer(s):
            output.append({"value": jx_expression({"offset": s}), "sort": 1})
        elif not s.sort and not s.value and all(d in sort_direction for d in s.values()):
            for v, d in s.items():
                output.append({"value": jx_expression(v), "sort": sort_direction[d]})
        elif not s.sort and not s.value:
            Log.error("`sort` clause must have a `value` property")
        else:
            output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": sort_direction[s.sort]})
    return output
示例#13
0
def process(sig_id,
            show=False,
            show_limit=MAX_POINTS,
            show_old=True,
            show_distribution=None):
    if not mo_math.is_integer(sig_id):
        Log.error("expecting integer id")
    sig = first(get_signature(config.database, sig_id))
    data = get_dataum(config.database, sig_id)

    min_date = (Date.today() - 3 * MONTH).unix
    pushes = jx.sort(
        [{
            "value": median(rows.value),
            "runs": rows,
            "push": {
                "time": unwrap(t)["push.time"]
            },
        } for t, rows in jx.groupby(data, "push.time")
         if t["push\\.time"] > min_date],
        "push.time",
    )

    values = pushes.value
    title = "-".join(
        map(
            text,
            [
                sig.id,
                sig.framework,
                sig.suite,
                sig.test,
                sig.platform,
                sig.repository.name,
            ],
        ))
    Log.note("With {{title}}", title=title)

    with Timer("find segments"):
        new_segments, new_diffs = find_segments(values, sig.alert_change_type,
                                                sig.alert_threshold)

    # USE PERFHERDER ALERTS TO IDENTIFY OLD SEGMENTS
    old_segments = tuple(
        sorted(
            set([
                i for i, p in enumerate(pushes) if any(r.alert.id
                                                       for r in p.runs)
            ] + [0, len(pushes)])))
    old_medians = [0] + [
        np.median(values[s:e])
        for s, e in zip(old_segments[:-1], old_segments[1:])
    ]
    old_diffs = np.array(
        [b / a - 1 for a, b in zip(old_medians[:-1], old_medians[1:])] + [0])

    if len(new_segments) == 1:
        dev_status = None
        dev_score = None
        relative_noise = None
    else:
        # MEASURE DEVIANCE (USE THE LAST SEGMENT)
        s, e = new_segments[-2], new_segments[-1]
        last_segment = np.array(values[s:e])
        trimmed_segment = last_segment[np.argsort(last_segment)
                                       [IGNORE_TOP:-IGNORE_TOP]]
        dev_status, dev_score = deviance(trimmed_segment)
        relative_noise = np.std(trimmed_segment) / np.mean(trimmed_segment)
        Log.note(
            "\n\tdeviance = {{deviance}}\n\tnoise={{std}}",
            title=title,
            deviance=(dev_status, dev_score),
            std=relative_noise,
        )

        if show_distribution:
            histogram(last_segment, title=dev_status + "=" + text(dev_score))

    max_extra_diff = None
    max_missing_diff = None
    _is_diff = is_diff(new_segments, old_segments)
    if _is_diff:
        # FOR MISSING POINTS, CALC BIGGEST DIFF
        max_extra_diff = mo_math.MAX(
            abs(d) for s, d in zip(new_segments, new_diffs)
            if all(not (s - TOLLERANCE <= o <= s + TOLLERANCE)
                   for o in old_segments))
        max_missing_diff = mo_math.MAX(
            abs(d) for s, d in zip(old_segments, old_diffs)
            if all(not (s - TOLLERANCE <= n <= s + TOLLERANCE)
                   for n in new_segments))

        Log.alert(
            "Disagree max_extra_diff={{max_extra_diff|round(places=3)}}, max_missing_diff={{max_missing_diff|round(places=3)}}",
            max_extra_diff=max_extra_diff,
            max_missing_diff=max_missing_diff,
        )
        Log.note("old={{old}}, new={{new}}",
                 old=old_segments,
                 new=new_segments)
        if show and len(pushes):
            show_old and assign_colors(
                values, old_segments, title="OLD " + title)
            assign_colors(values, new_segments, title="NEW " + title)
    else:
        Log.note("Agree")
        if show and len(pushes):
            show_old and assign_colors(
                values, old_segments, title="OLD " + title)
            assign_colors(values, new_segments, title="NEW " + title)

    summary_table.upsert(
        where={"eq": {
            "id": sig.id
        }},
        doc=Data(
            id=sig.id,
            title=title,
            num_pushes=len(pushes),
            is_diff=_is_diff,
            max_extra_diff=max_extra_diff,
            max_missing_diff=max_missing_diff,
            num_new_segments=len(new_segments),
            num_old_segments=len(old_segments),
            relative_noise=relative_noise,
            dev_status=dev_status,
            dev_score=dev_score,
            last_updated=Date.now(),
        ),
    )
示例#14
0
 def __init__(self, var):
     Expression.__init__(self, None)
     if not is_integer(var):
         Log.error("Expecting an integer")
     self.var = var