Пример #1
0
def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None):
    """
    Snagged from unittest/case.py, then modified (Aug2014)
    """
    if expected is NULL:
        if test == None:  # pandas dataframes reject any comparision with an exception!
            return
        else:
            raise AssertionError(expand_template("{{test}} != {{expected}}", locals()))

    if expected == None:  # None has no expectations
        return
    if test == expected:
        # shortcut
        return

    if not is_number(expected):
        # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL
        if is_list(expected) and len(expected) == 0 and test == None:
            return
        if is_data(expected) and not expected.keys() and test == None:
            return
        if test != expected:
            raise AssertionError(expand_template("{{test}} != {{expected}}", locals()))
        return

    num_param = 0
    if digits != None:
        num_param += 1
    if places != None:
        num_param += 1
    if delta != None:
        num_param += 1
    if num_param>1:
        raise TypeError("specify only one of digits, places or delta")

    if digits is not None:
        with suppress_exception:
            diff = log10(abs(test-expected))
            if diff < digits:
                return

        standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals())
    elif delta is not None:
        if abs(test - expected) <= delta:
            return

        standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals())
    else:
        if places is None:
            places = 15

        with suppress_exception:
            diff = mo_math.log10(abs(test-expected))
            if diff < mo_math.ceiling(mo_math.log10(abs(test)))-places:
                return

        standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals())

    raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
Пример #2
0
    def _setup_grcov(self):
        sudo("apt-get install -y gcc")

        response = http.get_json("https://api.github.com/repos/marco-c/grcov/releases/latest")
        with cd("~/ActiveData-ETL"):
            for asset in response.assets:
                if self.settings.grcov.platform in asset.browser_download_url:
                    run("wget "+asset.browser_download_url)
                    run(expand_template("tar xf grcov-{{platform}}.tar.bz2", self.settings.grcov))
                    run(expand_template("rm grcov-{{platform}}.tar.bz2", self.settings.grcov))
Пример #3
0
    def _aggop(self, query):
        """
        SINGLE ROW RETURNED WITH AGGREGATES
        """
        if isinstance(query.select, list):
            # RETURN SINGLE OBJECT WITH AGGREGATES
            for s in query.select:
                if s.aggregate not in aggregates:
                    Log.error("Expecting all columns to have an aggregate: {{select}}", select=s)

            selects = FlatList()
            for s in query.select:
                selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value),quote_column(s.name)))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
            """, {
                "selects": SQL(",\n".join(selects)),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.filter)
            })

            return sql, lambda sql: self.db.column(sql)[0]  # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES
        else:
            # RETURN SINGLE VALUE
            s0 = query.select
            if s0.aggregate not in aggregates:
                Log.error("Expecting all columns to have an aggregate: {{select}}", select=s0)

            select = sql_alias(aggregates[s0.aggregate].replace("{{code}}", s0.value) , quote_column(s0.name))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
            """, {
                "selects": SQL(select),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.where)
            })

            def post(sql):
                result = self.db.column_query(sql)
                return result[0][0]

            return sql, post  # RETURN SINGLE VALUE
Пример #4
0
    def column_query(self, sql, param=None):
        """
        RETURN RESULTS IN [column][row_num] GRID
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            result = transpose(*grid)

            if not old_cursor:  # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception as e:
            if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
Пример #5
0
    def execute(
        self,
        command,
        param=None,
        retry=True     # IF command FAILS, JUST THROW ERROR
    ):
        if param:
            command = expand_template(command, self.quote_param(param))

        output = None
        done = False
        while not done:
            try:
                with self.locker:
                    if not self.connection:
                        self._connect()

                with Closer(self.connection.cursor()) as curs:
                    curs.execute(command)
                    if curs.rowcount >= 0:
                        output = curs.fetchall()
                self.connection.commit()
                done = True
            except Exception as e:
                with suppress_exception:
                    self.connection.rollback()
                    # TODO: FIGURE OUT WHY rollback() DOES NOT HELP
                    self.connection.close()
                self.connection = None
                self._connect()
                if not retry:
                    Log.error("Problem with command:\n{{command|indent}}",  command= command, cause=e)
        return output
Пример #6
0
 def write(self, template, params):
     value = expand_template(template, params)
     self.locker.acquire()
     try:
         self.writer(value + CR)
     finally:
         self.locker.release()
Пример #7
0
    def _send_email(self):
        try:
            if not self.accumulation:
                return
            with Emailer(self.settings) as emailer:
                # WHO ARE WE SENDING TO
                emails = Data()
                for template, params in self.accumulation:
                    content = expand_template(template, params)
                    emails[literal_field(self.settings.to_address)] += [content]
                    for c in self.cc:
                        if any(d in params.params.error for d in c.contains):
                            emails[literal_field(c.to_address)] += [content]

                # SEND TO EACH
                for to_address, content in emails.items():
                    emailer.send_email(
                        from_address=self.settings.from_address,
                        to_address=listwrap(to_address),
                        subject=self.settings.subject,
                        text_data="\n\n".join(content)
                    )

            self.accumulation = []
        except Exception as e:
            Log.warning("Could not send", e)
        finally:
            self.next_send = Date.now() + self.settings.average_interval * (2 * Random.float())
Пример #8
0
 def write(self, template, params):
     try:
         with self.file_lock:
             self.file.append(expand_template(template, params))
     except Exception as e:
         Log.warning("Problem writing to file {{file}}, waiting...", file=file.name, cause=e)
         time.sleep(5)
Пример #9
0
    def query(self, sql, param=None, stream=False, row_tuples=False):
        """
        RETURN LIST OF dicts
        """
        if not self.cursor:  # ALLOW NON-TRANSACTIONAL READS
            Log.error("must perform all queries inside a transaction")
        self._execute_backlog()

        try:
            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            if row_tuples:
                if stream:
                    result = self.cursor
                else:
                    result = wrap(list(self.cursor))
            else:
                columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
                if stream:
                    result = (wrap({c: utf8_to_unicode(v) for c, v in zip(columns, row)}) for row in self.cursor)
                else:
                    result = wrap([{c: utf8_to_unicode(v) for c, v in zip(columns, row)} for row in self.cursor])

            return result
        except Exception as e:
            e = Except.wrap(e)
            if "InterfaceError" in e:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
Пример #10
0
    def _send_email(self):
        try:
            if not self.accumulation:
                return
            with Closer(connect_to_region(
                self.settings.region,
                aws_access_key_id=unwrap(self.settings.aws_access_key_id),
                aws_secret_access_key=unwrap(self.settings.aws_secret_access_key)
            )) as conn:

                # WHO ARE WE SENDING TO
                emails = Data()
                for template, params in self.accumulation:
                    content = expand_template(template, params)
                    emails[literal_field(self.settings.to_address)] += [content]
                    for c in self.cc:
                        if any(c in params.params.error for c in c.contains):
                            emails[literal_field(c.to_address)] += [content]

                # SEND TO EACH
                for to_address, content in emails.items():
                    conn.send_email(
                        source=self.settings.from_address,
                        to_addresses=listwrap(to_address),
                        subject=self.settings.subject,
                        body="\n\n".join(content),
                        format="text"
                    )

            self.next_send = Date.now() + self.settings.max_interval
            self.accumulation = []
        except Exception as e:
            self.next_send = Date.now() + self.settings.max_interval
            Log.warning("Could not send", e)
Пример #11
0
    def forall(self, sql, param=None, _execute=None):
        assert _execute
        num = 0

        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
            self.cursor.execute(sql)

            columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description])
            for r in self.cursor:
                num += 1
                _execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))

            if not old_cursor:  # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

        except Exception as e:
            Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)

        return num
Пример #12
0
 def quote_value(self, value):
     """
     convert values to mysql code for the same
     mostly delegate directly to the mysql lib, but some exceptions exist
     """
     try:
         if value == None:
             return SQL("NULL")
         elif isinstance(value, SQL):
             if not value.param:
                 # value.template CAN BE MORE THAN A TEMPLATE STRING
                 return self.quote_sql(value.template)
             param = {k: self.quote_sql(v) for k, v in value.param.items()}
             return SQL(expand_template(value.template, param))
         elif isinstance(value, basestring):
             return SQL(self.db.literal(value))
         elif isinstance(value, Mapping):
             return SQL(self.db.literal(json_encode(value)))
         elif Math.is_number(value):
             return SQL(unicode(value))
         elif isinstance(value, datetime):
             return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')")
         elif isinstance(value, Date):
             return SQL("str_to_date('"+value.format("%Y%m%d%H%M%S.%f")+"', '%Y%m%d%H%i%s.%f')")
         elif hasattr(value, '__iter__'):
             return SQL(self.db.literal(json_encode(value)))
         else:
             return self.db.literal(value)
     except Exception as e:
         Log.error("problem quoting SQL", e)
Пример #13
0
    def to_es_script(self, schema, not_null=False, boolean=False, many=True):
        term = FirstOp(self.term).partial_eval()
        value = term.to_es_script(schema)

        if is_op(value.frum, CoalesceOp_):
            return CoalesceOp(
                [StringOp(t).partial_eval() for t in value.frum.terms]
            ).to_es_script(schema)

        if value.miss is TRUE or value.type is IS_NULL:
            return empty_string_script
        elif value.type == BOOLEAN:
            return EsScript(
                miss=self.term.missing().partial_eval(),
                type=STRING,
                expr=value.expr + ' ? "T" : "F"',
                frum=self,
                schema=schema,
            )
        elif value.type == INTEGER:
            return EsScript(
                miss=self.term.missing().partial_eval(),
                type=STRING,
                expr="String.valueOf(" + value.expr + ")",
                frum=self,
                schema=schema,
            )
        elif value.type == NUMBER:
            return EsScript(
                miss=self.term.missing().partial_eval(),
                type=STRING,
                expr=expand_template(NUMBER_TO_STRING, {"expr": value.expr}),
                frum=self,
                schema=schema,
            )
        elif value.type == STRING:
            return value
        else:
            return EsScript(
                miss=self.term.missing().partial_eval(),
                type=STRING,
                expr=expand_template(NUMBER_TO_STRING, {"expr": value.expr}),
                frum=self,
                schema=schema,
            )
Пример #14
0
    def execute(self, sql, param=None):
        if self.transaction_level == 0:
            Log.error("Expecting transaction to be started before issuing queries")

        if param:
            sql = expand_template(sql, quote_param(param))
        sql = outdent(sql)
        self.backlog.append(sql)
        if self.debug or len(self.backlog) >= MAX_BATCH_SIZE:
            self._execute_backlog()
Пример #15
0
 def to_es_script(self, schema, not_null=False, boolean=False, many=True):
     return EsScript(
         miss=FALSE,
         type=INTEGER,
         expr=expand_template(
             _count_template,
             {"expr": Painless[self.terms].partial_eval().to_es_script(schema).expr},
         ),
         frum=self,
         schema=schema,
     )
Пример #16
0
    def write(self, template, params):
        try:
            log_line = expand_template(template, params)
            level = max(self.min_level, MAP[params.context])
            self.logger.log(level, log_line)
            self.count += 1
        except Exception as cause:
            cause = exceptions.Except.wrap(cause)
            import sys

            sys.stderr.write("can not write to logger: " + text(cause))
Пример #17
0
 def write(self, template, params):
     try:
         with self.file_lock:
             self.file.append(expand_template(template, params))
     except Exception as e:
         Log.warning(
             "Problem writing to file {{file}}, waiting...",
             file=self.file.name,
             cause=e,
         )
         time.sleep(5)
Пример #18
0
 def to_es_script(self, schema, not_null=False, boolean=False, many=True):
     return EsScript(
         miss=FALSE,
         type=INTEGER,
         expr=expand_template(
             _count_template,
             {"expr": Painless[self.terms].partial_eval().to_es_script(schema).expr},
         ),
         frum=self,
         schema=schema,
     )
Пример #19
0
    def append_query(self, es_query, start):
        self.start = start

        es_field = self.query.frum.schema.leaves(self.var)[0].es_column
        es_query = wrap({"aggs": {
            "_match": set_default({"terms": {
                "script":  expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
            }}, es_query)
        }})

        return es_query
Пример #20
0
    def execute(self, sql, param=None):
        if self.transaction_level == 0:
            Log.error(
                "Expecting transaction to be started before issuing queries")

        if param:
            sql = expand_template(sql, self.quote_param(param))
        sql = outdent(sql)
        self.backlog.append(sql)
        if self.debug or len(self.backlog) >= MAX_BATCH_SIZE:
            self._execute_backlog()
Пример #21
0
    def append_query(self, es_query, start):
        self.start = start

        es_field = self.query.frum.schema.leaves(self.var)[0].es_column
        es_query = wrap({"aggs": {
            "_match": set_default({"terms": {
                "script":  expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'})
            }}, es_query)
        }})

        return es_query
Пример #22
0
    def query(self, sql, param=None, stream=False, row_tuples=False):
        """
        RETURN A LIST OF dicts

        :param sql:  SQL TEMPLATE TO SEND
        :param param: PARAMETERS TO INJECT INTO SQL TEMPLATE
        :param stream: STREAM OUTPUT
        :param row_tuples: DO NOT RETURN dicts
        """
        if not self.cursor:  # ALLOW NON-TRANSACTIONAL READS
            Log.error("must perform all queries inside a transaction")
        self._execute_backlog()

        try:
            if isinstance(sql, SQL):
                sql = text(sql)
            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            if row_tuples:
                if stream:
                    result = self.cursor
                else:
                    result = wrap(list(self.cursor))
            else:
                columns = tuple(
                    utf8_to_unicode(d[0])
                    for d in coalesce(self.cursor.description, []))

                def streamer():
                    for row in self.cursor:
                        output = Data()
                        for c, v in zip(columns, row):
                            output[c] = v
                        yield output

                if stream:
                    result = streamer()
                else:
                    result = wrap(streamer())

            return result
        except Exception as e:
            e = Except.wrap(e)
            if "InterfaceError" in e:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",
                      sql=sql,
                      cause=e,
                      stack_depth=1)
Пример #23
0
    def append_query(self, query_path, es_query):
        es_field = first(self.query.frum.schema.leaves(self.var)).es_column

        return Aggs().add(
            TermsAggs(
                "_match", {
                    "script":
                    expand_template(
                        LIST_TO_PIPE,
                        {"expr": 'doc[' + quote(es_field) + '].values'}),
                    "size":
                    self.limit
                }, self).add(es_query))
Пример #24
0
        def inner(changeset_id):
            if self.es.cluster.version.startswith("1.7."):
                query = {
                    "query": {"filtered": {
                        "query": {"match_all": {}},
                        "filter": {"and": [
                            {"prefix": {"changeset.id": changeset_id}},
                            {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                        ]}
                    }},
                    "size": 1
                }
            else:
                query = {
                    "query": {"bool": {"must": [
                        {"prefix": {"changeset.id": changeset_id}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}},
                    "size": 1
                }

            try:
                # ALWAYS TRY ES FIRST
                with self.es_locker:
                    response = self.es.search(query)
                    json_diff = response.hits.hits[0]._source.changeset.diff
                if json_diff:
                    return json_diff
            except Exception as e:
                pass

            url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id})
            if DEBUG:
                Log.note("get unified diff from {{url}}", url=url)
            try:
                response = http.get(url)
                diff = response.content.decode("utf8", "replace")
                json_diff = diff_to_json(diff)
                num_changes = _count(c for f in json_diff for c in f.changes)
                if json_diff:
                    if num_changes < MAX_DIFF_SIZE:
                        return json_diff
                    elif revision.changeset.description.startswith("merge "):
                        return None  # IGNORE THE MERGE CHANGESETS
                    else:
                        Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes)
                        for file in json_diff:
                            file.changes = None
                        return json_diff
            except Exception as e:
                Log.warning("could not get unified diff", cause=e)
Пример #25
0
def table2csv(table_data):
    """
    :param table_data: expecting a list of tuples
    :return: text in nice formatted csv
    """
    text_data = [tuple(value2json(vals, pretty=True) for vals in rows) for rows in table_data]

    col_widths = [max(len(text) for text in cols) for cols in zip(*text_data)]
    template = ", ".join(
        "{{" + unicode(i) + "|left_align(" + unicode(w) + ")}}"
        for i, w in enumerate(col_widths)
    )
    text = "\n".join(expand_template(template, d) for d in text_data)
    return text
Пример #26
0
    def fill_container(self, subtest, typed=True):
        """
        RETURN SETTINGS THAT CAN BE USED TO POINT TO THE INDEX THAT'S FILLED
        """
        subtest = wrap(subtest)
        _settings = self._es_test_settings  # ALREADY COPIED AT setUp()

        try:
            url = "file://resources/schema/basic_schema.json.template?{{.|url}}"
            url = expand_template(url, {
                "type": _settings.type,
                "metadata": subtest.metadata
            })
            _settings.schema = mo_json_config.get(url)

            # MAKE CONTAINER
            container = self._es_cluster.get_or_create_index(
                typed=typed,
                schema=subtest.schema or _settings.schema,
                kwargs=_settings
            )
            container.add_alias(_settings.index)

            _settings.alias = container.settings.alias
            _settings.index = container.settings.index
            ESUtils.indexes.append(_settings.index)

            # INSERT DATA
            if '"null"' in value2json(subtest.data):
                Log.error("not expected")
            container.extend([{"value": d} for d in subtest.data])
            container.flush()

            now = Date.now()
            namespace = ElasticsearchMetadata(self._es_cluster.settings)
            namespace.get_columns(_settings.alias, after=now)  # FORCE A RELOAD

            # ENSURE query POINTS TO CONTAINER
            frum = subtest.query["from"]
            if frum == None:
                subtest.query["from"] = _settings.alias
            elif is_text(frum):
                subtest.query["from"] = frum.replace(test_jx.TEST_TABLE, _settings.alias)
            else:
                Log.error("Do not know how to handle")

        except Exception as e:
            Log.error("can not load {{data}} into container", data=subtest.data, cause=e)

        return _settings
Пример #27
0
def execute_sql(
    host,
    username,
    password,
    sql,
    schema=None,
    param=None,
    kwargs=None
):
    """EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?"""
    kwargs.schema = coalesce(kwargs.schema, kwargs.database)

    if param:
        with MySQL(kwargs) as temp:
            sql = expand_template(sql, quote_param(param))

    # We have no way to execute an entire SQL file in bulk, so we
    # have to shell out to the commandline client.
    args = [
        "mysql",
        "-h{0}".format(host),
        "-u{0}".format(username),
        "-p{0}".format(password)
    ]
    if schema:
        args.append("{0}".format(schema))

    try:
        proc = subprocess.Popen(
            args,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            bufsize=-1
        )
        if is_text(sql):
            sql = sql.encode("utf8")
        (output, _) = proc.communicate(sql)
    except Exception as e:
        raise Log.error("Can not call \"mysql\"", e)

    if proc.returncode:
        if len(sql) > 10000:
            sql = "<" + text(len(sql)) + " bytes of sql>"
        Log.error(
            "Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n",
            sql=indent(sql),
            return_code=proc.returncode,
            output=output
        )
Пример #28
0
        def inner(changeset_id):
            if self.es.cluster.version.startswith("1.7."):
                query = {
                    "query": {"filtered": {
                        "query": {"match_all": {}},
                        "filter": {"and": [
                            {"prefix": {"changeset.id": changeset_id}},
                            {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                        ]}
                    }},
                    "size": 1
                }
            else:
                query = {
                    "query": {"bool": {"must": [
                        {"prefix": {"changeset.id": changeset_id}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}},
                    "size": 1
                }

            try:
                # ALWAYS TRY ES FIRST
                with self.es_locker:
                    response = self.es.search(query)
                    json_diff = response.hits.hits[0]._source.changeset.diff
                if json_diff:
                    return json_diff
            except Exception as e:
                pass

            url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id})
            DEBUG and Log.note("get unified diff from {{url}}", url=url)
            try:
                response = http.get(url)
                diff = response.content.decode("utf8")
                json_diff = diff_to_json(diff)
                num_changes = _count(c for f in json_diff for c in f.changes)
                if json_diff:
                    if revision.changeset.description.startswith("merge "):
                        return None  # IGNORE THE MERGE CHANGESETS
                    elif num_changes < MAX_DIFF_SIZE:
                        return json_diff
                    else:
                        Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes)
                        for file in json_diff:
                            file.changes = None
                        return json_diff
            except Exception as e:
                Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
Пример #29
0
def table2csv(table_data):
    """
    :param table_data: expecting a list of tuples
    :return: text in nice formatted csv
    """
    text_data = [tuple(value2json(vals, pretty=True) for vals in rows) for rows in table_data]

    col_widths = [max(len(t) for t in cols) for cols in zip(*text_data)]
    template = ", ".join(
        "{{" + text(i) + "|left_align(" + text(w) + ")}}"
        for i, w in enumerate(col_widths)
    )
    output = "\n".join(expand_template(template, d) for d in text_data)
    return output
Пример #30
0
def execute_sql(
    host,
    username,
    password,
    sql,
    schema=None,
    param=None,
    kwargs=None
):
    """EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?"""
    kwargs.schema = coalesce(kwargs.schema, kwargs.database)

    if param:
        with MySQL(kwargs) as temp:
            sql = expand_template(sql, quote_param(param))

    # We have no way to execute an entire SQL file in bulk, so we
    # have to shell out to the commandline client.
    args = [
        "mysql",
        "-h{0}".format(host),
        "-u{0}".format(username),
        "-p{0}".format(password)
    ]
    if schema:
        args.append("{0}".format(schema))

    try:
        proc = subprocess.Popen(
            args,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            bufsize=-1
        )
        if is_text(sql):
            sql = sql.encode("utf8")
        (output, _) = proc.communicate(sql)
    except Exception as e:
        raise Log.error("Can not call \"mysql\"", e)

    if proc.returncode:
        if len(sql) > 10000:
            sql = "<" + text_type(len(sql)) + " bytes of sql>"
        Log.error(
            "Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n",
            sql=indent(sql),
            return_code=proc.returncode,
            output=output
        )
Пример #31
0
def to_ruby(self, schema):
    term = FirstOp("first", self.term).partial_eval()
    value = term.to_ruby(schema)

    if isinstance(value.frum, CoalesceOp):
        return CoalesceOp("coalesce", [StringOp("string", t).partial_eval() for t in value.frum.terms]).to_ruby(schema)

    if value.type == BOOLEAN:
        return Ruby(
            miss=self.term.missing().partial_eval(),
            type=STRING,
            expr=value.expr + ' ? "T" : "F"',
            frum=self
        )
    elif value.type == INTEGER:
        return Ruby(
            miss=self.term.missing().partial_eval(),
            type=STRING,
            expr="String.valueOf(" + value.expr + ")",
            frum=self
        )
    elif value.type == NUMBER:
        return Ruby(
            miss=self.term.missing().partial_eval(),
            type=STRING,
            expr=expand_template(TO_STRING, {"expr":value.expr}),
            frum=self
        )
    elif value.type == STRING:
        return value
    else:
        return Ruby(
            miss=self.term.missing().partial_eval(),
            type=STRING,
            expr=expand_template(TO_STRING, {"expr":value.expr}),
            frum=self
        )
Пример #32
0
def to_es_script(self, schema):
    term = FirstOp("first", self.term).partial_eval()
    value = term.to_es_script(schema)

    if isinstance(value.frum, CoalesceOp):
        return CoalesceOp("coalesce", [StringOp("string", t).partial_eval() for t in value.frum.terms]).to_es_script(schema)

    if value.type == BOOLEAN:
        return EsScript(
            miss=self.term.missing().partial_eval(),
            type=STRING,
            expr=value.expr + ' ? "T" : "F"',
            frum=self
        )
    elif value.type == INTEGER:
        return EsScript(
            miss=self.term.missing().partial_eval(),
            type=STRING,
            expr="String.valueOf(" + value.expr + ")",
            frum=self
        )
    elif value.type == NUMBER:
        return EsScript(
            miss=self.term.missing().partial_eval(),
            type=STRING,
            expr=expand_template(TO_STRING, {"expr":value.expr}),
            frum=self
        )
    elif value.type == STRING:
        return value
    else:
        return EsScript(
            miss=self.term.missing().partial_eval(),
            type=STRING,
            expr=expand_template(TO_STRING, {"expr":value.expr}),
            frum=self
        )
Пример #33
0
def compileString2Term(edge):
    if edge.esscript:
        Log.error("edge script not supported yet")

    value = edge.value
    if is_variable_name(value):
        value = strings.expand_template("getDocValue({{path}})",
                                        {"path": quote(value)})
    else:
        Log.error("not handled")

    def fromTerm(value):
        return edge.domain.getPartByKey(value)

    return Data(toTerm={"head": "", "body": value}, fromTerm=fromTerm)
Пример #34
0
 def test_round(self):
     expected = [
         "0.0000000003142", "0.000000003142", "0.00000003142",
         "0.0000003142", "0.000003142", "0.00003142", "0.0003142",
         "0.003142", "0.03142", "0.3142", "3.142", "31.42", "314.2", "3142",
         "31420", "314200", "3142000", "31420000", "314200000",
         "3142000000", "31420000000", "314200000000", "3142000000000",
         "31420000000000", "314200000000000", "3142000000000000",
         "31420000000000000", "314200000000000000"
     ]
     start = -10
     for order in range(start, 18):
         value = pi * (10**order)
         test = expand_template("{{value|round(places=4)}}",
                                value={"value": value})
         self.assertEqual(test, expected[order - start])
Пример #35
0
    def query(self, sql, param=None, stream=False, row_tuples=False):
        """
        RETURN LIST OF dicts
        """
        if not self.cursor:  # ALLOW NON-TRANSACTIONAL READS
            Log.error("must perform all queries inside a transaction")
        self._execute_backlog()

        try:
            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            if row_tuples:
                if stream:
                    result = self.cursor
                else:
                    result = wrap(list(self.cursor))
            else:
                columns = [
                    utf8_to_unicode(d[0])
                    for d in coalesce(self.cursor.description, [])
                ]
                if stream:
                    result = (wrap(
                        {c: utf8_to_unicode(v)
                         for c, v in zip(columns, row)})
                              for row in self.cursor)
                else:
                    result = wrap(
                        [{c: utf8_to_unicode(v)
                          for c, v in zip(columns, row)}
                         for row in self.cursor])

            return result
        except Exception as e:
            if isinstance(
                    e,
                    InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",
                      sql=sql,
                      cause=e,
                      stack_depth=1)
Пример #36
0
def compileString2Term(edge):
    if edge.esscript:
        Log.error("edge script not supported yet")

    value = edge.value
    if is_variable_name(value):
        value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)})
    else:
        Log.error("not handled")

    def fromTerm(value):
        return edge.domain.getPartByKey(value)

    return Data(
        toTerm={"head": "", "body": value},
        fromTerm=fromTerm
    )
Пример #37
0
    def __unicode__(self):
        output = self.type + ": " + self.template + "\n"
        if self.params:
            output = expand_template(output, self.params)

        if self.trace:
            output += indent(format_trace(self.trace))

        if self.cause:
            cause_strings = []
            for c in listwrap(self.cause):
                with suppress_exception:
                    cause_strings.append(text_type(c))

            output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)

        return output
Пример #38
0
    def __unicode__(self):
        output = self.type + ": " + self.template + "\n"
        if self.params:
            output = expand_template(output, self.params)

        if self.trace:
            output += indent(format_trace(self.trace))

        if self.cause:
            cause_strings = []
            for c in listwrap(self.cause):
                with suppress_exception:
                    cause_strings.append(text_type(c))

            output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)

        return output
Пример #39
0
def time_delta_pusher(please_stop, appender, queue, interval):
    """
    appender - THE FUNCTION THAT ACCEPTS A STRING
    queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE
    interval - timedelta
    USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
    """

    next_run = time() + interval

    while not please_stop:
        profiler = Thread.current().cprofiler
        profiler.disable()
        (Till(till=next_run) | please_stop).wait()
        profiler.enable()

        next_run = time() + interval
        logs = queue.pop_all()
        if not logs:
            continue

        lines = []
        for log in logs:
            try:
                if log is THREAD_STOP:
                    please_stop.go()
                    next_run = time()
                else:
                    expanded = expand_template(log.get("template"),
                                               log.get("params"))
                    lines.append(expanded)
            except Exception as e:
                location = log.get('params', {}).get('location', {})
                Log.warning("Trouble formatting log from {{location}}",
                            location=location,
                            cause=e)
                # SWALLOW ERROR, GOT TO KEEP RUNNING
        try:
            appender(u"\n".join(lines) + u"\n")
        except Exception as e:

            sys.stderr.write(
                str("Trouble with appender: ") + str(e.__class__.__name__) +
                str("\n"))
Пример #40
0
    def __unicode__(self):
        output = self.context + ": " + self.template + CR
        if self.params:
            output = expand_template(output, self.params)

        if self.trace:
            output += indent(format_trace(self.trace))

        if self.cause:
            cause_strings = []
            for c in listwrap(self.cause):
                try:
                    cause_strings.append(text_type(c))
                except Exception as e:
                    sys.stderr("Problem serializing cause" + text_type(c))

            output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)

        return output
Пример #41
0
    def __unicode__(self):
        output = self.context + ": " + self.template + CR
        if self.params:
            output = expand_template(output, self.params)

        if self.trace:
            output += indent(format_trace(self.trace))

        if self.cause:
            cause_strings = []
            for c in listwrap(self.cause):
                try:
                    cause_strings.append(text_type(c))
                except Exception as e:
                    sys.stderr("Problem serializing cause"+text_type(c))

            output += "caused by\n\t" + "and caused by\n\t".join(cause_strings)

        return output
Пример #42
0
 def quote_sql(self, value, param=None):
     """
     USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT
     """
     try:
         if isinstance(value, SQL):
             if not param:
                 return value
             param = {k: self.quote_sql(v) for k, v in param.items()}
             return expand_template(value, param)
         elif isinstance(value, text_type):
             return value
         elif isinstance(value, Mapping):
             return self.db.literal(json_encode(value))
         elif hasattr(value, '__iter__'):
             return sql_iso(sql_list([self.quote_sql(vv) for vv in value]))
         else:
             return text_type(value)
     except Exception as e:
         Log.error("problem quoting SQL", e)
Пример #43
0
 def quote_sql(self, value, param=None):
     """
     USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT
     """
     try:
         if isinstance(value, SQL):
             if not param:
                 return value
             param = {k: self.quote_sql(v) for k, v in param.items()}
             return expand_template(value, param)
         elif isinstance(value, basestring):
             return value
         elif isinstance(value, Mapping):
             return self.db.literal(json_encode(value))
         elif hasattr(value, '__iter__'):
             return "(" + ",".join([self.quote_sql(vv) for vv in value]) + ")"
         else:
             return unicode(value)
     except Exception as e:
         Log.error("problem quoting SQL", e)
Пример #44
0
def quote_sql(value, param=None):
    """
    USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT
    """
    try:
        if isinstance(value, SQL):
            if not param:
                return value
            param = {k: quote_sql(v) for k, v in param.items()}
            return SQL(expand_template(value, param))
        elif is_text(value):
            return SQL(value)
        elif is_data(value):
            return quote_value(json_encode(value))
        elif hasattr(value, '__iter__'):
            return quote_list(value)
        else:
            return text_type(value)
    except Exception as e:
        Log.error("problem quoting SQL", e)
Пример #45
0
def quote_sql(value, param=None):
    """
    USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT
    """
    try:
        if isinstance(value, SQL):
            if not param:
                return value
            param = {k: quote_sql(v) for k, v in param.items()}
            return SQL(expand_template(value, param))
        elif is_text(value):
            return SQL(value)
        elif is_data(value):
            return quote_value(json_encode(value))
        elif hasattr(value, '__iter__'):
            return quote_list(value)
        else:
            return text(value)
    except Exception as e:
        Log.error("problem quoting SQL", e)
Пример #46
0
    def query(self, sql, param=None):
        """
        RETURN LIST OF dicts
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            columns = [
                utf8_to_unicode(d[0])
                for d in coalesce(self.cursor.description, [])
            ]
            fixed = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            result = convert.table2list(columns, fixed)

            if not old_cursor:  # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception as e:
            if isinstance(
                    e,
                    InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",
                      sql=sql,
                      cause=e,
                      stack_depth=1)
Пример #47
0
def time_delta_pusher(please_stop, appender, queue, interval):
    """
    appender - THE FUNCTION THAT ACCEPTS A STRING
    queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE
    interval - timedelta
    USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
    """

    next_run = time() + interval

    while not please_stop:
        profiler = Thread.current().cprofiler
        profiler.disable()
        (Till(till=next_run) | please_stop).wait()
        profiler.enable()

        next_run = time() + interval
        logs = queue.pop_all()
        if not logs:
            continue

        lines = []
        for log in logs:
            try:
                if log is THREAD_STOP:
                    please_stop.go()
                    next_run = time()
                else:
                    expanded = expand_template(log.get("template"), log.get("params"))
                    lines.append(expanded)
            except Exception as e:
                location = log.get('params', {}).get('location', {})
                Log.warning("Trouble formatting log from {{location}}", location=location, cause=e)
                # SWALLOW ERROR, GOT TO KEEP RUNNING
        try:
            appender(u"\n".join(lines) + u"\n")
        except Exception as e:

            sys.stderr.write(str("Trouble with appender: ") + str(e.__class__.__name__) + str("\n"))
Пример #48
0
        def inner(changeset_id):
            if self.es.cluster.version.startswith("1.7."):
                query = {
                    "query": {"filtered": {
                        "query": {"match_all": {}},
                        "filter": {"and": [
                            {"prefix": {"changeset.id": changeset_id}},
                            {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                        ]}
                    }},
                    "size": 1
                }
            else:
                query = {
                    "query": {"bool": {"must": [
                        {"prefix": {"changeset.id": changeset_id}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}},
                    "size": 1
                }

            try:
                # ALWAYS TRY ES FIRST
                with self.es_locker:
                    response = self.es.search(query)
                    moves = response.hits.hits[0]._source.changeset.moves
                if moves:
                    return moves
            except Exception as e:
                pass

            url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id})
            DEBUG and Log.note("get unified diff from {{url}}", url=url)
            try:
                moves = http.get(url).content.decode('latin1')  # THE ENCODING DOES NOT MATTER BECAUSE WE ONLY USE THE '+', '-' PREFIXES IN THE DIFF
                return diff_to_moves(text_type(moves))
            except Exception as e:
                Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
Пример #49
0
    def column_query(self, sql, param=None):
        """
        RETURN RESULTS IN [column][row_num] GRID
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            result = transpose(*grid)

            if not old_cursor:  # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception as e:
            e = Except.wrap(e)
            if "InterfaceError" in e:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",
                      sql=sql,
                      cause=e,
                      stack_depth=1)
Пример #50
0
    def fill_container(self, subtest, tjson=False):
        """
        RETURN SETTINGS THAT CAN BE USED TO POINT TO THE INDEX THAT'S FILLED
        """
        subtest = wrap(subtest)
        _settings = self._es_test_settings  # ALREADY COPIED AT setUp()
        # _settings.index = "testing_" + Random.hex(10).lower()
        # settings.type = "test_result"

        try:
            url = "file://resources/schema/basic_schema.json.template?{{.|url}}"
            url = expand_template(url, {
                "type": _settings.type,
                "metadata": subtest.metadata
            })
            _settings.schema = mo_json_config.get(url)

            # MAKE CONTAINER
            container = self._es_cluster.get_or_create_index(tjson=tjson, kwargs=_settings)
            container.add_alias(_settings.index)

            # INSERT DATA
            container.extend([
                {"value": v} for v in subtest.data
            ])
            container.flush()
            # ENSURE query POINTS TO CONTAINER
            frum = subtest.query["from"]
            if frum == None:
                subtest.query["from"] = _settings.index
            elif isinstance(frum, basestring):
                subtest.query["from"] = frum.replace(TEST_TABLE, _settings.index)
            else:
                Log.error("Do not know how to handle")
        except Exception, e:
            Log.error("can not load {{data}} into container", {"data":subtest.data}, e)
Пример #51
0
def replace_vars(text, params=None):
    """
    REPLACE {{vars}} WITH ENVIRONMENTAL VALUES
    """
    start = 0
    var = strings.between(text, "{{", "}}", start)
    while var:
        replace = "{{" + var + "}}"
        index = text.find(replace, 0)
        if index == -1:
            Log.error("could not find {{var}} (including quotes)", var=replace)
        end = index + len(replace)

        try:
            replacement = text_type(Date(var).unix)
            text = text[:index] + replacement + text[end:]
            start = index + len(replacement)
        except Exception as _:
            start += 1

        var = strings.between(text, "{{", "}}", start)

    text = expand_template(text, coalesce(params, {}))
    return text
Пример #52
0
def es_aggsop(es, frum, query):
    query = query.copy()  # WE WILL MARK UP THIS QUERY
    schema = frum.schema
    select = listwrap(query.select)

    es_query = Data()
    new_select = Data()  # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
            if schema.query_path == ".":
                s.pull = jx_expression_to_function("doc_count")
            else:
                s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]})
        elif isinstance(s.value, Variable):
            if s.aggregate == "count":
                new_select["count_"+literal_field(s.value.var)] += [s]
            else:
                new_select[literal_field(s.value.var)] += [s]
        elif s.aggregate:
            formula.append(s)

    for canonical_name, many in new_select.items():
        for s in many:
            columns = frum.schema.values(s.value.var)

            if s.aggregate == "count":
                canonical_names = []
                for column in columns:
                    cn = literal_field(column.es_column + "_count")
                    if column.jx_type == EXISTS:
                        canonical_names.append(cn + ".doc_count")
                        es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}}
                    else:
                        canonical_names.append(cn+ ".value")
                        es_query.aggs[cn].value_count.field = column.es_column
                if len(canonical_names) == 1:
                    s.pull = jx_expression_to_function(canonical_names[0])
                else:
                    s.pull = jx_expression_to_function({"add": canonical_names})
            elif s.aggregate == "median":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = columns[0].es_column
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = jx_expression_to_function(key + ".values.50\\.0")
            elif s.aggregate == "percentile":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")
                if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile:
                    Log.error("Expecting percentile to be a float from 0.0 to 1.0")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = columns[0].es_column
                es_query.aggs[key].percentiles.percents += [percent]
                es_query.aggs[key].percentiles.tdigest.compression = 2
                s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
            elif s.aggregate == "cardinality":
                canonical_names = []
                for column in columns:
                    cn = literal_field(column.es_column + "_cardinality")
                    canonical_names.append(cn)
                    es_query.aggs[cn].cardinality.field = column.es_column
                if len(columns) == 1:
                    s.pull = jx_expression_to_function(canonical_names[0] + ".value")
                else:
                    s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0})
            elif s.aggregate == "stats":
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                # REGULAR STATS
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].extended_stats.field = columns[0].es_column

                # GET MEDIAN TOO!
                median_name = literal_field(canonical_name + "_percentile")
                es_query.aggs[median_name].percentiles.field = columns[0].es_column
                es_query.aggs[median_name].percentiles.percents += [50]

                s.pull = get_pull_stats(stats_name, median_name)
            elif s.aggregate == "union":
                pulls = []
                for column in columns:
                    script = {"scripted_metric": {
                        'init_script': 'params._agg.terms = new HashSet()',
                        'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v);',
                        'combine_script': 'return params._agg.terms.toArray()',
                        'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
                    }}
                    stats_name = encode_property(column.es_column)
                    if column.nested_path[0] == ".":
                        es_query.aggs[stats_name] = script
                        pulls.append(jx_expression_to_function(stats_name + ".value"))
                    else:
                        es_query.aggs[stats_name] = {
                            "nested": {"path": column.nested_path[0]},
                            "aggs": {"_nested": script}
                        }
                        pulls.append(jx_expression_to_function(stats_name + "._nested.value"))

                if len(pulls) == 0:
                    s.pull = NULL
                elif len(pulls) == 1:
                    s.pull = pulls[0]
                else:
                    s.pull = lambda row: UNION(p(row) for p in pulls)
            else:
                if len(columns) > 1:
                    Log.error("Do not know how to count columns with more than one type (script probably)")
                elif len(columns) <1:
                    # PULL VALUE OUT OF THE stats AGGREGATE
                    s.pull = jx_expression_to_function({"null":{}})
                else:
                    # PULL VALUE OUT OF THE stats AGGREGATE
                    es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column
                    s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]})

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)

        if isinstance(s.value, TupleOp):
            if s.aggregate == "count":
                # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
                s.pull = "doc_count"
            elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
                if s.aggregate in ('max', 'maximum'):
                    dir = 1
                    op = "max"
                else:
                    dir = -1
                    op = 'min'

                nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr
                selfy = s.value.partial_eval().to_es_script(schema).expr

                script = {"scripted_metric": {
                    'init_script': 'params._agg.best = ' + nully + ';',
                    'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";",
                    'combine_script': 'return params._agg.best',
                    'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()',
                }}
                if schema.query_path[0] == ".":
                    es_query.aggs[canonical_name] = script
                    s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
                else:
                    es_query.aggs[canonical_name] = {
                        "nested": {"path": schema.query_path[0]},
                        "aggs": {"_nested": script}
                    }
                    s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value")
            else:
               Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate)
        elif s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value")
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = jx_expression_to_function(key + ".values.50\\.0")
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent)))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(key + ".value")
        elif s.aggregate == "stats":
            # REGULAR STATS
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema)

            # GET MEDIAN TOO!
            median_name = literal_field(canonical_name + " percentile")
            es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema)
            es_query.aggs[median_name].percentiles.percents += [50]

            s.pull = get_pull_stats(stats_name, median_name)
        elif s.aggregate == "union":
            # USE TERMS AGGREGATE TO SIMULATE union
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema)
            s.pull = jx_expression_to_function(stats_name + ".buckets.key")
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate])
            es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema)

    decoders = get_decoders_by_depth(query)
    start = 0

    # <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(query.where, schema=frum.schema)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2::]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            #TODO: INCLUDE FILTERS ON EDGES
            filter_ = AndOp("and", split_where[1]).to_esfilter(schema)
            es_query = Data(
                aggs={"_filter": set_default({"filter": filter_}, es_query)}
            )

        es_query = wrap({
            "aggs": {"_nested": set_default(
                {"nested": {"path": schema.query_path[0]}},
                es_query
            )}
        })
    else:
        if any(split_where[1::]):
            Log.error("Where clause is too deep")

    if decoders:
        for d in jx.reverse(decoders[0]):
            es_query = d.append_query(es_query, start)
            start += d.num_columns

    if split_where[0]:
        #TODO: INCLUDE FILTERS ON EDGES
        filter = AndOp("and", split_where[0]).to_esfilter(schema)
        es_query = Data(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es_post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting")
        with format_time:
            decoders = [d for ds in decoders for d in ds]
            result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
            if query.edges:
                output = formatter(decoders, result.aggregations, start, query, select)
            elif query.groupby:
                output = groupby_formatter(decoders, result.aggregations, start, query, select)
            else:
                output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", cause=e)
Пример #53
0
def json2value(json_string, params=Null, flexible=False, leaves=False):
    """
    :param json_string: THE JSON
    :param params: STANDARD JSON PARAMS
    :param flexible: REMOVE COMMENTS
    :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED
    :return: Python value
    """
    if isinstance(json_string, str):
        Log.error("only unicode json accepted")

    try:
        if flexible:
            # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r
            # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58
            json_string = re.sub(r"\"\"\".*?\"\"\"",
                                 r"\n",
                                 json_string,
                                 flags=re.MULTILINE)
            json_string = "\n".join(
                remove_line_comment(l) for l in json_string.split("\n"))
            # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA
            json_string = re.sub(r",\s*\}", r"}", json_string)
            # ALLOW LISTS TO END WITH COMMA
            json_string = re.sub(r",\s*\]", r"]", json_string)

        if params:
            # LOOKUP REFERENCES
            json_string = expand_template(json_string, params)

        try:
            value = wrap(json_decoder(unicode(json_string)))
        except Exception as e:
            Log.error("can not decode\n{{content}}",
                      content=json_string,
                      cause=e)

        if leaves:
            value = wrap_leaves(value)

        return value

    except Exception as e:
        e = Except.wrap(e)

        if not json_string.strip():
            Log.error("JSON string is only whitespace")

        c = e
        while "Expecting '" in c.cause and "' delimiter: line" in c.cause:
            c = c.cause

        if "Expecting '" in c and "' delimiter: line" in c:
            line_index = int(strings.between(c.message, " line ",
                                             " column ")) - 1
            column = int(strings.between(c.message, " column ", " ")) - 1
            line = json_string.split("\n")[line_index].replace("\t", " ")
            if column > 20:
                sample = "..." + line[column - 20:]
                pointer = "   " + (" " * 20) + "^"
            else:
                sample = line
                pointer = (" " * column) + "^"

            if len(sample) > 43:
                sample = sample[:43] + "..."

            Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" +
                      pointer + "\n")

        base_str = strings.limit(json_string, 1000).encode('utf8')
        hexx_str = bytes2hex(base_str, " ")
        try:
            char_str = " " + "  ".join(
                (c.decode("latin1") if ord(c) >= 32 else ".")
                for c in base_str)
        except Exception as e:
            char_str = " "
        Log.error("Can not decode JSON:\n" + char_str + "\n" + hexx_str + "\n",
                  e)
Пример #54
0
 def message(self):
     return expand_template(self.template, self.params)
Пример #55
0
def format_trace(tbs, start=0):
    trace = []
    for d in tbs[start::]:
        item = expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d)
        trace.append(item)
    return "".join(trace)
Пример #56
0
def DataClass(name, columns, constraint=None):
    """
    Use the DataClass to define a class, but with some extra features:
    1. restrict the datatype of property
    2. restrict if `required`, or if `nulls` are allowed
    3. generic constraints on object properties

    It is expected that this class become a real class (or be removed) in the
    long term because it is expensive to use and should only be good for
    verifying program correctness, not user input.

    :param name: Name of the class we are creating
    :param columns: Each columns[i] has properties {
            "name",     - (required) name of the property
            "required", - False if it must be defined (even if None)
            "nulls",    - True if property can be None, or missing
            "default",  - A default value, if none is provided
            "type"      - a Python datatype
        }
    :param constraint: a JSON query Expression for extra constraints (return true if all constraints are met)
    :return: The class that has been created
    """

    columns = wrap([{
        "name": c,
        "required": True,
        "nulls": False,
        "type": object
    } if is_text(c) else c for c in columns])
    slots = columns.name
    required = wrap(
        filter(lambda c: c.required and not c.nulls and not c.default,
               columns)).name
    nulls = wrap(filter(lambda c: c.nulls, columns)).name
    defaults = {c.name: coalesce(c.default, None) for c in columns}
    types = {c.name: coalesce(c.jx_type, object) for c in columns}

    code = expand_template(
        """
from __future__ import unicode_literals
from mo_future import is_text, is_binary
from collections import Mapping

meta = None
types_ = {{types}}
defaults_ = {{defaults}}

class {{class_name}}(Mapping):
    __slots__ = {{slots}}


    def _constraint(row, rownum, rows):
        try:
            return {{constraint_expr}}
        except Exception as e:
            Log.error(
                "constraint\\n{" + "{code}}\\nnot satisfied {" + "{expect}}\\n{" + "{value|indent}}",
                code={{constraint_expr|quote}}, 
                expect={{constraint}}, 
                value=row,
                cause=e
            )

    def __init__(self, **kwargs):
        if not kwargs:
            return

        for s in {{slots}}:
            object.__setattr__(self, s, kwargs.get(s, {{defaults}}.get(s, None)))

        missed = {{required}}-set(kwargs.keys())
        if missed:
            Log.error("Expecting properties {"+"{missed}}", missed=missed)

        illegal = set(kwargs.keys())-set({{slots}})
        if illegal:
            Log.error("{"+"{names}} are not a valid properties", names=illegal)

        self._constraint(0, [self])

    def __getitem__(self, item):
        return getattr(self, item)

    def __setitem__(self, item, value):
        setattr(self, item, value)
        return self

    def __setattr__(self, item, value):
        if item not in {{slots}}:
            Log.error("{"+"{item|quote}} not valid attribute", item=item)
        object.__setattr__(self, item, value)
        self._constraint(0, [self])

    def __getattr__(self, item):
        Log.error("{"+"{item|quote}} not valid attribute", item=item)

    def __hash__(self):
        return object.__hash__(self)

    def __eq__(self, other):
        if isinstance(other, {{class_name}}) and dict(self)==dict(other) and self is not other:
            Log.error("expecting to be same object")
        return self is other

    def __dict__(self):
        return {k: getattr(self, k) for k in {{slots}}}

    def items(self):
        return ((k, getattr(self, k)) for k in {{slots}})

    def __copy__(self):
        _set = object.__setattr__
        output = object.__new__({{class_name}})
        {{assign}}
        return output

    def __iter__(self):
        return {{slots}}.__iter__()

    def __len__(self):
        return {{len_slots}}

    def __str__(self):
        return str({{dict}})

""",
        {
            "class_name":
            name,
            "slots":
            "(" + (", ".join(quote(s) for s in slots)) + ")",
            "required":
            "{" + (", ".join(quote(s) for s in required)) + "}",
            "nulls":
            "{" + (", ".join(quote(s) for s in nulls)) + "}",
            "defaults":
            Literal(defaults).to_python(),
            "len_slots":
            len(slots),
            "dict":
            "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}",
            "assign":
            "; ".join("_set(output, " + quote(s) + ", self." + s + ")"
                      for s in slots),
            "types":
            "{" +
            (",".join(quote(k) + ": " + v.__name__
                      for k, v in types.items())) + "}",
            "constraint_expr":
            Python[jx_expression(not ENABLE_CONSTRAINTS
                                 or constraint)].to_python(),
            "constraint":
            value2json(constraint),
        },
    )

    output = _exec(code, name)
    register_data(output)
    return output
Пример #57
0
def format_trace(tbs, start=0):
    return "".join(
        expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d)
        for d in tbs[start::]
    )
Пример #58
0
 def message(self):
     return expand_template(self.template, self.params)
Пример #59
0
 def _get_source_code_from_hg(self, revision, file_path):
     response = http.get(expand_template(FILE_URL, {"location": revision.branch.url, "rev": revision.changeset.id, "path": file_path}))
     return response.content.decode("utf8", "replace")
Пример #60
0
def get_dataum(db_config, signature_id):
    db = MySQL(db_config)
    with db:
        return db.query(expand_template(datum_sql, quote_list(listwrap(signature_id))))