Пример #1
0
    def _make_range_domain(self, domain, column_name):
        width = (domain.max - domain.min) / domain.interval
        digits = mo_math.floor(mo_math.log10(width - 1))
        if digits == 0:
            value = "a.value"
        else:
            value = SQL("+").join("1" + ("0" * j) + "*" +
                                  text_type(chr(ord(b'a') + j)) + ".value"
                                  for j in range(digits + 1))

        if domain.interval == 1:
            if domain.min == 0:
                domain = (SQL_SELECT + value + column_name + SQL_FROM +
                          "__digits__ a")
            else:
                domain = (SQL_SELECT + sql_iso(value) + " + " +
                          quote_value(domain.min) + column_name + SQL_FROM +
                          "__digits__ a")
        else:
            if domain.min == 0:
                domain = (SQL_SELECT + value + " * " +
                          quote_value(domain.interval) + column_name +
                          SQL_FROM + "__digits__ a")
            else:
                domain = (
                    SQL_SELECT +
                    sql_iso(value + " * " + quote_value(domain.interval)) +
                    " + " + quote_value(domain.min) + column_name + SQL_FROM +
                    "__digits__ a")

        for j in range(digits):
            domain += SQL_INNER_JOIN + "__digits__" + text_type(
                chr(ord(b'a') + j + 1)) + " ON " + SQL_TRUE
        domain += SQL_WHERE + value + " < " + quote_value(width)
        return domain
Пример #2
0
    def _make_range_domain(self, domain, column_name):
        width = (domain.max - domain.min) / domain.interval
        digits = Math.floor(Math.log10(width - 1))
        if digits == 0:
            value = "a.value"
        else:
            value = "+".join("1" + ("0" * j) + "*" +
                             text_type(chr(ord(b'a') + j)) + ".value"
                             for j in range(digits + 1))

        if domain.interval == 1:
            if domain.min == 0:
                domain = "SELECT " + value + " " + column_name + \
                         "\nFROM __digits__ a"
            else:
                domain = "SELECT (" + value + ") + " + quote_value(domain.min) + " " + column_name + \
                         "\nFROM __digits__ a"
        else:
            if domain.min == 0:
                domain = "SELECT " + value + " * " + quote_value(domain.interval) + " " + column_name + \
                         "\nFROM __digits__ a"
            else:
                domain = "SELECT (" + value + " * " + quote_value(domain.interval) + ") + " + quote_value(
                    domain.min) + " " + column_name + \
                         "\nFROM __digits__ a"

        for j in range(digits):
            domain += "\nJOIN __digits__ " + text_type(
                chr(ord(b'a') + j + 1)) + " ON 1=1"
        domain += "\nWHERE " + value + " < " + quote_value(width)
        return domain
Пример #3
0
 def _db_insert_column(self, column):
     try:
         self.db.execute(
             "INSERT INTO"
             + db_table_name
             + sql_iso(all_columns)
             + "VALUES"
             + sql_iso(
                 sql_list(
                     [
                         quote_value(column[c.name])
                         if c.name not in ("nested_path", "partitions")
                         else quote_value(value2json(column[c.name]))
                         for c in METADATA_COLUMNS
                     ]
                 )
             )
         )
     except Exception as e:
         e = Except.wrap(e)
         if "UNIQUE constraint failed" in e or " are not unique" in e:
             # THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA
             self.todo.add((UPDATE, column), force=True)
         else:
             Log.error("do not know how to handle", cause=e)
Пример #4
0
    def request(self, method, path, headers):
        now = Date.now()
        self.inbound_rate.add(now)
        ready = Signal(path)

        # TEST CACHE
        with self.cache_locker:
            pair = self.cache.get(path)
            if pair is None:
                self.cache[path] = (ready, None, None, now)


        if pair is not None:
            # REQUEST IS IN THE QUEUE ALREADY, WAIT
            ready, headers, response, then = pair
            if response is None:
                ready.wait()
                with self.cache_locker:
                    ready, headers, response, timestamp = self.cache.get(path)
            with self.db.transaction() as t:
                t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
            return Response(
                response,
                status=200,
                headers=json.loads(headers)
            )

        # TEST DB
        db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data
        if db_response:
            headers, response = db_response[0]
            with self.db.transaction() as t:
                t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
            with self.cache_locker:
                self.cache[path] = (ready, headers, response.encode('latin1'), now)
            ready.go()

            return Response(
                response,
                status=200,
                headers=json.loads(headers)
            )

        # MAKE A NETWORK REQUEST
        self.todo.add((ready, method, path, headers, now))
        ready.wait()
        with self.cache_locker:
            ready, headers, response, timestamp = self.cache[path]
        return Response(
            response,
            status=200,
            headers=json.loads(headers)
        )
Пример #5
0
 def to_sql(self, schema, not_null=False, boolean=False):
     value = self.value
     if value == None:
         return wrap([{"name": "."}])
     elif isinstance(value, text):
         return wrap([{"name": ".", "sql": {"s": quote_value(value)}}])
     elif is_number(value):
         return wrap([{"name": ".", "sql": {"n": quote_value(value)}}])
     elif value in [True, False]:
         return wrap([{"name": ".", "sql": {"b": quote_value(value)}}])
     else:
         return wrap([{"name": ".", "sql": {"j": quote_value(self.json)}}])
Пример #6
0
    def request(self, method, path, headers):
        now = Date.now()
        self.inbound_rate.add(now)
        ready = Signal(path)

        # TEST CACHE
        with self.cache_locker:
            pair = self.cache.get(path)
            if pair is None:
                self.cache[path] = (ready, None, None, now)


        if pair is not None:
            # REQUEST IS IN THE QUEUE ALREADY, WAIT
            ready, headers, response, then = pair
            if response is None:
                ready.wait()
                with self.cache_locker:
                    ready, headers, response, timestamp = self.cache.get(path)
            with self.db.transaction() as t:
                t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
            return Response(
                response,
                status=200,
                headers=json.loads(headers)
            )

        # TEST DB
        db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data
        if db_response:
            headers, response = db_response[0]
            with self.db.transaction() as t:
                t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
            with self.cache_locker:
                self.cache[path] = (ready, headers, response.encode('latin1'), now)
            ready.go()

            return Response(
                response,
                status=200,
                headers=json.loads(headers)
            )

        # MAKE A NETWORK REQUEST
        self.todo.add((ready, method, path, headers, now))
        ready.wait()
        with self.cache_locker:
            ready, headers, response, timestamp = self.cache[path]
        return Response(
            response,
            status=200,
            headers=json.loads(headers)
        )
Пример #7
0
    def to_sql(self, schema, not_null=False, boolean=False):
        defult = self.default.to_sql(schema)
        if len(self.terms) == 0:
            return defult
        defult = coalesce(defult[0].sql, SQL_NULL)
        sep = self.separator.to_sql(schema)[0].sql.s

        acc = []
        for t in self.terms:
            missing = t.missing().partial_eval()

            term = t.to_sql(schema, not_null=True)[0].sql
            if term.s:
                term_sql = term.s
            elif term.n:
                term_sql = "cast(" + term.n + " as text)"
            else:
                term_sql = (SQL_CASE + SQL_WHEN + term.b + SQL_THEN +
                            quote_value("true") + SQL_ELSE +
                            quote_value("false") + SQL_END)

            if isinstance(missing, TrueOp):
                acc.append(SQL_EMPTY_STRING)
            elif missing:
                acc.append(
                    SQL_CASE + SQL_WHEN +
                    sql_iso(missing.to_sql(schema, boolean=True)[0].sql.b) +
                    SQL_THEN + SQL_EMPTY_STRING + SQL_ELSE +
                    sql_iso(sql_concat([sep, term_sql])) + SQL_END)
            else:
                acc.append(sql_concat([sep, term_sql]))

        expr_ = ("substr(" + sql_concat(acc) + ", " +
                 LengthOp(None, self.separator).to_sql(schema)[0].sql.n +
                 "+1)")

        missing = self.missing()
        if not missing:
            return wrap([{"name": ".", "sql": {"s": expr_}}])
        else:
            return wrap([{
                "name": ".",
                "sql": {
                    "s":
                    SQL_CASE + SQL_WHEN + "(" +
                    missing.to_sql(schema, boolean=True)[0].sql.b + ")" +
                    SQL_THEN + "(" + defult + ")" + SQL_ELSE + "(" + expr_ +
                    ")" + SQL_END
                },
            }])
Пример #8
0
 def to_sql(self, schema, not_null=False, boolean=False):
     term = SQLang[self.term].partial_eval()
     if is_literal(term):
         val = term.value
         if isinstance(val, text):
             sql = quote_value(len(val))
         elif isinstance(val, (float, int)):
             sql = quote_value(len(convert.value2json(val)))
         else:
             return Null
     else:
         value = term.to_sql(schema, not_null=not_null)[0].sql.s
         sql = ConcatSQL((SQL("LENGTH"), sql_iso(value)))
     return wrap([{"name": ".", "sql": {"n": sql}}])
Пример #9
0
    def _insert(self, collection):
        for nested_path, details in collection.items():
            active_columns = wrap(list(details.active_columns))
            rows = details.rows
            table_name = concat_field(self.facts.snowflake.fact_name,
                                      nested_path)

            if table_name == self.facts.snowflake.fact_name:
                # DO NOT REQUIRE PARENT OR ORDER COLUMNS
                meta_columns = [GUID, UID]
            else:
                meta_columns = [UID, PARENT, ORDER]

            all_columns = meta_columns + active_columns.es_column

            prefix = ("INSERT INTO " + quote_column(table_name) +
                      sql_iso(sql_list(map(quote_column, all_columns))))

            # BUILD THE RECORDS
            records = SQL_UNION_ALL.join(
                SQL_SELECT +
                sql_list(quote_value(row.get(c)) for c in all_columns)
                for row in unwrap(rows))

            with self.db.transaction() as t:
                t.execute(prefix + records)
Пример #10
0
    def _insert(self, collection):
        for nested_path, details in collection.items():
            active_columns = wrap(list(details.active_columns))
            rows = details.rows
            num_rows = len(rows)
            table_name = concat_field(self.name, nested_path)

            if table_name == self.name:
                # DO NOT REQUIRE PARENT OR ORDER COLUMNS
                meta_columns = [GUID, UID]
            else:
                meta_columns = [UID, PARENT, ORDER]

            all_columns = meta_columns + active_columns.es_column  # ONLY THE PRIMITIVE VALUE COLUMNS
            command = ConcatSQL([
                SQL_INSERT,
                quote_column(table_name),
                sql_iso(sql_list(map(quote_column, all_columns))), SQL_VALUES,
                sql_list(
                    sql_iso(
                        sql_list(quote_value(row.get(c)) for c in all_columns))
                    for row in unwrap(rows))
            ])

            with self.db.transaction() as t:
                t.execute(command)
Пример #11
0
 def _make_digits_table(self):
     existence = self.db.query("PRAGMA table_info(__digits__)")
     if not existence.data:
         self.db.execute("CREATE TABLE __digits__(value INTEGER)")
         self.db.execute("INSERT INTO __digits__ " +
                         SQL_UNION_ALL.join(SQL_SELECT + SQL(quote_value(i))
                                            for i in range(10)))
Пример #12
0
    def _db_load(self):
        self.last_load = Date.now()

        result = self._query(
            SQL_SELECT
            + "name"
            + SQL_FROM
            + "sqlite_master"
            + SQL_WHERE
            + SQL_AND.join(["name=" + db_table_name, "type=" + quote_value("table")])
        )
        if not result.data:
            self._db_create()
            return

        result = self._query(
            SQL_SELECT
            + all_columns
            + SQL_FROM
            + db_table_name
            + SQL_ORDERBY
            + sql_list(map(quote_column, ["es_index", "name", "es_column"]))
        )

        with self.locker:
            for r in result.data:
                c = row_to_column(result.header, r)
                self._add(c)
Пример #13
0
    def _edges_op(self, query, frum):
        query = query.copy()  # WE WILL BE MARKING UP THE QUERY
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = []  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        frum_path = split_field(frum)
        base_table = join_field(frum_path[0:1])
        path = join_field(frum_path[1:])
        nest_to_alias = {
            nested_path: quote_column("__" + unichr(ord('a') + i) + "__")
            for i, (nested_path, sub_table) in enumerate(self.sf.tables.items())
        }

        schema = self.sf.tables[relative_field(frum, self.sf.fact)].schema

        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = quote_column(join_field([base_table] + split_field(tables[0].nest))) + tables[0].alias
        for previous, t in zip(tables, tables[1::]):
            from_sql += (
                SQL_LEFT_JOIN + quote_column(concat_field(base_table, t.nest)) + t.alias +
                SQL_ON + join_column(t.alias, quoted_PARENT) + " = " + join_column(previous.alias, quoted_UID)
            )

        main_filter = query.where.to_sql(schema, boolean=True)[0].sql.b

        # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH
        ons = []
        join_types = []
        wheres = []
        null_ons = [EXISTS_COLUMN + SQL_IS_NULL]
        groupby = []
        null_groupby = []
        orderby = []
        domains = []

        select_clause = [SQL_ONE + EXISTS_COLUMN] + [quote_column(c.es_column) for c in self.sf.tables['.'].columns]

        for edge_index, query_edge in enumerate(query.edges):
            edge_alias = quote_column("e" + text_type(edge_index))

            if query_edge.value:
                edge_values = [p for c in query_edge.value.to_sql(schema).sql for p in c.items()]

            elif not query_edge.value and any(query_edge.domain.partitions.where):
                case = SQL_CASE
                for pp, p in enumerate(query_edge.domain.partitions):
                    w = p.where.to_sql(schema)[0].sql.b
                    t = quote_value(pp)
                    case += SQL_WHEN + w + SQL_THEN + t
                case += SQL_ELSE + SQL_NULL + SQL_END  # quote value with length of partitions
                edge_values = [("n", case)]

            elif query_edge.range:
                edge_values = query_edge.range.min.to_sql(schema)[0].sql.items() + query_edge.range.max.to_sql(schema)[
                    0].sql.items()
Пример #14
0
    def to_sql(self, schema, not_null=False, boolean=False):
        default = self.default.to_sql(schema)
        if len(self.terms) == 0:
            return default
        default = coalesce(default[0].sql.s, SQL_NULL)
        sep = SQLang[self.separator].to_sql(schema)[0].sql.s

        acc = []
        for t in self.terms:
            t = SQLang[t]
            missing = t.missing().partial_eval()

            term = t.to_sql(schema, not_null=True)[0].sql
            if term.s:
                term_sql = term.s
            elif term.n:
                term_sql = "cast(" + term.n + " as text)"
            else:
                term_sql = (SQL_CASE + SQL_WHEN + term.b + SQL_THEN +
                            quote_value("true") + SQL_ELSE +
                            quote_value("false") + SQL_END)

            if isinstance(missing, TrueOp):
                acc.append(SQL_EMPTY_STRING)
            elif missing:
                acc.append(
                    SQL_CASE + SQL_WHEN +
                    sql_iso(missing.to_sql(schema, boolean=True)[0].sql.b) +
                    SQL_THEN + SQL_EMPTY_STRING + SQL_ELSE +
                    sql_iso(sql_concat_text([sep, term_sql])) + SQL_END)
            else:
                acc.append(sql_concat_text([sep, term_sql]))

        expr_ = "SUBSTR" + sql_iso(
            sql_list([
                sql_concat_text(acc),
                LengthOp(self.separator).to_sql(schema)[0].sql.n + SQL("+1"),
            ]))

        return SQLScript(expr=expr_,
                         data_type=STRING,
                         frum=self,
                         miss=self.missing(),
                         many=False,
                         schema=schema)
Пример #15
0
def to_sql(self, schema, not_null=False, boolean=False):
    if not isinstance(self.superset, Literal):
        Log.error("Not supported")
    j_value = json2value(self.superset.json)
    if j_value:
        var = self.value.to_sql(schema)
        return SQL_OR.join(sql_iso(var + "==" + quote_value(v)) for v in j_value)
    else:
        return wrap([{"name": ".", "sql": {"b": SQL_FALSE}}])
Пример #16
0
def _work(name, db, sigs, please_stop):
    try:
        sigs[0].begin.wait()
        with db.transaction() as t:
            sigs[0].done.go()
            sigs[1].begin.wait()
            t.execute("INSERT INTO my_table VALUES " + sql_iso(quote_value(name)))
            sigs[1].done.go()

            sigs[2].begin.wait()
            result = t.query("SELECT * FROM my_table WHERE value=" + quote_value(name))
            assert len(result.data) == 1
            assert result.data[0][0] == name
        sigs[2].done.go()
    finally:
        # RELEASE ALL SIGNALS, THIS IS ENDING BADLY
        for s in sigs:
            s.done.go()
Пример #17
0
 def to_sql(self, schema, not_null=False, boolean=False):
     pattern = quote_value(json2value(self.pattern.json))
     value = self.var.to_sql(schema)[0].sql.s
     return wrap([{
         "name": ".",
         "sql": {
             "b": value + " REGEXP " + pattern
         }
     }])
Пример #18
0
def _work(name, db, sigs, please_stop):
    try:
        sigs[0].begin.wait()
        with db.transaction() as t:
            sigs[0].done.go()
            sigs[1].begin.wait()
            t.execute("INSERT INTO my_table VALUES " +
                      sql_iso(quote_value(name)))
            sigs[1].done.go()

            sigs[2].begin.wait()
            result = t.query("SELECT * FROM my_table WHERE value=" +
                             quote_value(name))
            assert len(result.data) == 1
            assert result.data[0][0] == name
        sigs[2].done.go()
    finally:
        # RELEASE ALL SIGNALS, THIS IS ENDING BADLY
        for s in sigs:
            s.done.go()
Пример #19
0
 def _make_digits_table(self):
     existence = self.db.query("PRAGMA table_info(__digits__)")
     if not existence.data:
         with self.db.transaction() as t:
             t.execute(
                 "CREATE TABLE" + quote_column(DIGITS_TABLE) + "(value INTEGER)"
             )
             t.execute(
                 "INSERT INTO"
                 + quote_column(DIGITS_TABLE)
                 + SQL_UNION_ALL.join(SQL_SELECT + quote_value(i) for i in range(10))
             )
Пример #20
0
 def to_sql(self, schema, not_null=False, boolean=False):
     test = self.term.missing().to_sql(schema, boolean=True)[0].sql.b
     value = self.term.to_sql(schema, not_null=True)[0].sql
     acc = []
     for t, v in value.items():
         if t == "b":
             acc.append(SQL_CASE + SQL_WHEN + sql_iso(test) + SQL_THEN +
                        SQL_NULL + SQL_WHEN + sql_iso(v) + SQL_THEN +
                        "'true'" + SQL_ELSE + "'false'" + SQL_END)
         elif t == "s":
             acc.append(v)
         else:
             acc.append("RTRIM(RTRIM(CAST" +
                        sql_iso(v + " as TEXT), " + quote_value("0")) +
                        ", " + quote_value(".") + ")")
     if not acc:
         return wrap([{}])
     elif len(acc) == 1:
         return wrap([{"name": ".", "sql": {"s": acc[0]}}])
     else:
         return wrap([{"name": ".", "sql": {"s": sql_coalesce(acc)}}])
Пример #21
0
    def _insert(self, collection):
        for nested_path, details in collection.items():
            active_columns = wrap(list(details.active_columns))
            rows = details.rows
            table_name = concat_field(self.sf.fact, nested_path)

            if table_name == self.sf.fact:
                # DO NOT REQUIRE PARENT OR ORDER COLUMNS
                meta_columns = [GUID, UID]
            else:
                meta_columns = [UID, PARENT, ORDER]

            all_columns = meta_columns + active_columns.es_column

            prefix = "INSERT INTO " + quote_table(table_name) + \
                     "(" + ",".join(map(quote_table, all_columns)) + ")"

            # BUILD THE RECORDS
            records = " UNION ALL ".join(
                "\nSELECT " +
                ",".join(quote_value(row.get(c)) for c in all_columns)
                for row in unwrap(rows))

            self.db.execute(prefix + records)
Пример #22
0
    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command['clear']))
        for c in self.schema.columns:
            if c.name in touched_columns and len(c.nested_path) > 1:
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars for c in self.columns.get(v, Null)
            if c.jx_type not in STRUCT
        }
        where_sql = where.map(_map).to_sql(self.schema)
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(name=new_column_name,
                            jx_type=ctype,
                            es_index=self.name,
                            es_type=json_type_to_sqlite_type(ctype),
                            es_column=typed_column(new_column_name, ctype),
                            last_updated=Date.now())
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = concat_field(self.name, nested_column_name)
                nested_table = nested_tables[nested_column_name]
                self_primary_key = sql_list(
                    quote_column(c.es_column) for u in self.uid
                    for c in self.columns[u])
                extra_key_name = UID + text(len(self.uid))
                extra_key = [e
                             for e in nested_table.columns[extra_key_name]][0]

                sql_command = (
                    SQL_DELETE + SQL_FROM + quote_column(nested_table.name) +
                    SQL_WHERE + "EXISTS" +
                    sql_iso(SQL_SELECT + SQL_ONE + SQL_FROM +
                            sql_alias(quote_column(nested_table.name), "n") +
                            SQL_INNER_JOIN +
                            sql_iso(SQL_SELECT + self_primary_key + SQL_FROM +
                                    quote_column(abs_schema.fact) + SQL_WHERE +
                                    where_sql) + " t ON " +
                            SQL_AND.join(
                                quote_column("t", c.es_column) + SQL_EQ +
                                quote_column("n", c.es_column)
                                for u in self.uid for c in self.columns[u])))
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d,
                                         Data(),
                                         doc_collection,
                                         path=nested_column_name)

                prefix = SQL_INSERT + quote_column(nested_table.name) + sql_iso(
                    sql_list([self_primary_key] + [quote_column(extra_key)] + [
                        quote_column(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns
                    ]))

                # BUILD THE PARENT TABLES
                parent = (SQL_SELECT + self_primary_key + SQL_FROM +
                          quote_column(abs_schema.fact) + SQL_WHERE +
                          jx_expression(command.where).to_sql(schema))

                # BUILD THE RECORDS
                children = SQL_UNION_ALL.join(
                    SQL_SELECT + quote_value(i) + " " +
                    quote_column(extra_key.es_column) + "," + sql_list(
                        quote_value(row[c.name]) + " " +
                        quote_column(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns)
                    for i, row in enumerate(
                        doc_collection.get(".", Null).rows))

                sql_command = (prefix + SQL_SELECT + sql_list([
                    quote_column("p", c.es_column) for u in self.uid
                    for c in self.columns[u]
                ] + [quote_column("c", extra_key)] + [
                    quote_column("c", c.es_column)
                    for c in doc_collection.get(".", Null).active_columns
                ]) + SQL_FROM + sql_iso(parent) + " p" + SQL_INNER_JOIN +
                               sql_iso(children) + " c" + " ON " + SQL_TRUE)

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(name=c.name,
                                        jx_type=c.jx_type,
                                        es_type=c.es_type,
                                        es_index=c.es_index,
                                        es_column=c.es_column,
                                        nested_path=[nested_column_name] +
                                        c.nested_path,
                                        last_updated=Date.now())
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.jx_type not in [
                                c.jx_type for c in self.columns[c.name]
                        ]:
                            self.columns[column.name].add(column)

        command = (
            SQL_UPDATE + quote_column(abs_schema.fact) + SQL_SET + sql_list([
                quote_column(c) + SQL_EQ +
                quote_value(get_if_type(v, c.jx_type))
                for k, v in command.set.items() if get_type(v) != "nested"
                for c in self.columns[k]
                if c.jx_type != "nested" and len(c.nested_path) == 1
            ] + [
                quote_column(c) + SQL_EQ + SQL_NULL
                for k in listwrap(command['clear']) if k in self.columns
                for c in self.columns[k]
                if c.jx_type != "nested" and len(c.nested_path) == 1
            ]) + SQL_WHERE + where_sql)

        self.db.execute(command)
Пример #23
0
    def _db_worker(self, please_stop):
        while not please_stop:
            try:
                with self._db_transaction():
                    result = self._query(
                        SQL_SELECT
                        + all_columns
                        + SQL_FROM
                        + db_table_name
                        + SQL_WHERE
                        + "last_updated > "
                        + quote_value(self.last_load)
                        + SQL_ORDERBY
                        + sql_list(map(quote_column, ["es_index", "name", "es_column"]))
                    )

                with self.locker:
                    for r in result.data:
                        c = row_to_column(result.header, r)
                        self._add(c)
                        if c.last_updated > self.last_load:
                            self.last_load = c.last_updated

                updates = self.todo.pop_all()
                DEBUG and updates and Log.note(
                    "{{num}} columns to push to db", num=len(updates)
                )
                for action, column in updates:
                    while not please_stop:
                        try:
                            with self._db_transaction():
                                DEBUG and Log.note(
                                    "{{action}} db for {{table}}.{{column}}",
                                    action=action,
                                    table=column.es_index,
                                    column=column.es_column,
                                )
                                if action is EXECUTE:
                                    self.db.execute(column)
                                elif action is UPDATE:
                                    self.db.execute(
                                        "UPDATE"
                                        + db_table_name
                                        + "SET"
                                        + sql_list(
                                            [
                                                "count=" + quote_value(column.count),
                                                "cardinality="
                                                + quote_value(column.cardinality),
                                                "multi=" + quote_value(column.multi),
                                                "partitions="
                                                + quote_value(
                                                    value2json(column.partitions)
                                                ),
                                                "last_updated="
                                                + quote_value(column.last_updated),
                                            ]
                                        )
                                        + SQL_WHERE
                                        + SQL_AND.join(
                                            [
                                                "es_index = "
                                                + quote_value(column.es_index),
                                                "es_column = "
                                                + quote_value(column.es_column),
                                                "last_updated < "
                                                + quote_value(column.last_updated),
                                            ]
                                        )
                                    )
                                elif action is DELETE:
                                    self.db.execute(
                                        "DELETE FROM"
                                        + db_table_name
                                        + SQL_WHERE
                                        + SQL_AND.join(
                                            [
                                                "es_index = "
                                                + quote_value(column.es_index),
                                                "es_column = "
                                                + quote_value(column.es_column),
                                            ]
                                        )
                                    )
                                else:
                                    self._db_insert_column(column)
                            break
                        except Exception as e:
                            e = Except.wrap(e)
                            if "database is locked" in e:
                                Log.note("metadata database is locked")
                                Till(seconds=1).wait()
                                break
                            else:
                                Log.warning("problem updataing database", cause=e)

            except Exception as e:
                Log.warning("problem updating database", cause=e)

            (Till(seconds=10) | please_stop).wait()
Пример #24
0
    def _set_op(self, query):
        # GET LIST OF SELECTED COLUMNS
        vars_ = UNION([
            v.var for select in listwrap(query.select)
            for v in select.value.vars()
        ])
        schema = self.schema
        known_vars = schema.keys()

        active_columns = {".": set()}
        for v in vars_:
            for c in schema.leaves(v):
                nest = c.nested_path[0]
                active_columns.setdefault(nest, set()).add(c)

        # ANY VARS MENTIONED WITH NO COLUMNS?
        for v in vars_:
            if not any(startswith_field(cname, v) for cname in known_vars):
                active_columns["."].add(
                    Column(name=v,
                           jx_type=IS_NULL,
                           es_column=".",
                           es_index=".",
                           es_type='NULL',
                           nested_path=["."],
                           last_updated=Date.now()))

        # EVERY COLUMN, AND THE INDEX IT TAKES UP
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        index_to_uid = {}  # FROM NESTED PATH TO THE INDEX OF UID
        sql_selects = [
        ]  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        nest_to_alias = {
            nested_path[0]: "__" + unichr(ord('a') + i) + "__"
            for i, nested_path in enumerate(self.snowflake.query_paths)
        }

        sorts = []
        if query.sort:
            for select in query.sort:
                col = SQLang[select.value].to_sql(schema)[0]
                for t, sql in col.sql.items():
                    json_type = sql_type_to_json_type[t]
                    if json_type in STRUCT:
                        continue
                    column_number = len(sql_selects)
                    # SQL HAS ABS TABLE REFERENCE
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(sql_alias(sql, column_alias))
                    if select.sort == -1:
                        sorts.append(quote_column(column_alias) + SQL_IS_NULL)
                        sorts.append(quote_column(column_alias) + " DESC")
                    else:
                        sorts.append(quote_column(column_alias) + SQL_IS_NULL)
                        sorts.append(quote_column(column_alias))

        primary_doc_details = Data()
        # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH
        # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED
        nested_path = []
        for step, sub_table in self.snowflake.tables:
            nested_path.insert(0, step)
            nested_doc_details = {
                "sub_table": sub_table,
                "children": [],
                "index_to_column": {},
                "nested_path": nested_path
            }

            # INSERT INTO TREE
            if not primary_doc_details:
                primary_doc_details = nested_doc_details
            else:

                def place(parent_doc_details):
                    if startswith_field(step,
                                        parent_doc_details['nested_path'][0]):
                        for c in parent_doc_details['children']:
                            if place(c):
                                return True
                        parent_doc_details['children'].append(
                            nested_doc_details)

                place(primary_doc_details)

            alias = nested_doc_details['alias'] = nest_to_alias[step]

            # WE ALWAYS ADD THE UID
            column_number = index_to_uid[step] = nested_doc_details[
                'id_coord'] = len(sql_selects)
            sql_select = quote_column(alias, UID)
            sql_selects.append(
                sql_alias(sql_select, _make_column_name(column_number)))
            if step != ".":
                # ID AND ORDER FOR CHILD TABLES
                index_to_column[column_number] = ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=nested_path,
                    column_alias=_make_column_name(column_number))
                column_number = len(sql_selects)
                sql_select = quote_column(alias, ORDER)
                sql_selects.append(
                    sql_alias(sql_select, _make_column_name(column_number)))
                index_to_column[column_number] = ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=nested_path,
                    column_alias=_make_column_name(column_number))

            # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM
            if step not in active_columns:
                continue

            # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE
            si = 0
            for select in listwrap(query.select):
                try:
                    column_number = len(sql_selects)
                    select.pull = get_column(column_number)
                    db_columns = SQLang[select.value].partial_eval().to_sql(
                        schema)

                    for column in db_columns:
                        for t, unsorted_sql in column.sql.items():
                            json_type = sql_type_to_json_type[t]
                            if json_type in STRUCT:
                                continue
                            column_number = len(sql_selects)
                            column_alias = _make_column_name(column_number)
                            sql_selects.append(
                                sql_alias(unsorted_sql, column_alias))
                            if startswith_field(schema.path, step) and is_op(
                                    select.value, LeavesOp):
                                # ONLY FLATTEN primary_nested_path AND PARENTS, NOT CHILDREN
                                index_to_column[
                                    column_number] = nested_doc_details[
                                        'index_to_column'][
                                            column_number] = ColumnMapping(
                                                push_name=literal_field(
                                                    get_property_name(
                                                        concat_field(
                                                            select.name,
                                                            column.name))),
                                                push_child=".",
                                                push_column_name=
                                                get_property_name(
                                                    concat_field(
                                                        select.name,
                                                        column.name)),
                                                push_column=si,
                                                pull=get_column(column_number),
                                                sql=unsorted_sql,
                                                type=json_type,
                                                column_alias=column_alias,
                                                nested_path=nested_path)
                                si += 1
                            else:
                                index_to_column[
                                    column_number] = nested_doc_details[
                                        'index_to_column'][
                                            column_number] = ColumnMapping(
                                                push_name=select.name,
                                                push_child=column.name,
                                                push_column_name=select.name,
                                                push_column=si,
                                                pull=get_column(column_number),
                                                sql=unsorted_sql,
                                                type=json_type,
                                                column_alias=column_alias,
                                                nested_path=nested_path)
                finally:
                    si += 1

        where_clause = BooleanOp(query.where).partial_eval().to_sql(
            schema, boolean=True)[0].sql.b
        unsorted_sql = self._make_sql_for_one_nest_in_set_op(
            ".", sql_selects, where_clause, active_columns, index_to_column)

        for n, _ in self.snowflake.tables:
            sorts.append(quote_column(COLUMN + text(index_to_uid[n])))

        ordered_sql = ConcatSQL(
            (SQL_SELECT, SQL_STAR, SQL_FROM,
             sql_iso(unsorted_sql), SQL_ORDERBY, sql_list(sorts), SQL_LIMIT,
             quote_value(query.limit)))
        result = self.db.query(ordered_sql)

        def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id,
                               parent_id_coord):
            """
            :param rows: REVERSED STACK OF ROWS (WITH push() AND pop())
            :param row: CURRENT ROW BEING EXTRACTED
            :param nested_doc_details: {
                    "nested_path": wrap_nested_path(nested_path),
                    "index_to_column": map from column number to column details
                    "children": all possible direct decedents' nested_doc_details
                 }
            :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop)
            :param parent_id_coord: the column number for the parent id (so we ca extract from each row)
            :return: the nested property (usually an array)
            """
            previous_doc_id = None
            doc = Null
            output = []
            id_coord = nested_doc_details['id_coord']

            while True:
                doc_id = row[id_coord]

                if doc_id == None or (parent_id_coord is not None and
                                      row[parent_id_coord] != parent_doc_id):
                    rows.append(
                        row
                    )  # UNDO PREVIOUS POP (RECORD IS NOT A NESTED RECORD OF parent_doc)
                    return output

                if doc_id != previous_doc_id:
                    previous_doc_id = doc_id
                    doc = Null
                    curr_nested_path = nested_doc_details['nested_path'][0]
                    index_to_column = nested_doc_details[
                        'index_to_column'].items()
                    for i, c in index_to_column:
                        value = row[i]
                        if is_list(query.select) or is_op(
                                query.select.value, LeavesOp):
                            # ASSIGN INNER PROPERTIES
                            relative_field = concat_field(
                                c.push_name, c.push_child)
                        else:  # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                            relative_field = c.push_child

                        if relative_field == ".":
                            if exists(value):
                                doc = value
                        elif exists(value):
                            if doc is Null:
                                doc = Data()
                            doc[relative_field] = value

                for child_details in nested_doc_details['children']:
                    # EACH NESTED TABLE MUST BE ASSEMBLED INTO A LIST OF OBJECTS
                    child_id = row[child_details['id_coord']]
                    if child_id is not None:
                        nested_value = _accumulate_nested(
                            rows, row, child_details, doc_id, id_coord)
                        if nested_value != None:
                            push_name = child_details['nested_path'][0]
                            if is_list(query.select) or is_op(
                                    query.select.value, LeavesOp):
                                # ASSIGN INNER PROPERTIES
                                relative_field = relative_field(
                                    push_name, curr_nested_path)
                            else:  # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                                relative_field = "."

                            if relative_field == ".":
                                doc = unwraplist(nested_value)
                            else:
                                doc[relative_field] = unwraplist(nested_value)

                output.append(doc)

                try:
                    row = rows.pop()
                except IndexError:
                    return output

        cols = tuple(
            [i for i in index_to_column.values() if i.push_name != None])
        rows = list(reversed(unwrap(result.data)))
        if rows:
            row = rows.pop()
            data = _accumulate_nested(rows, row, primary_doc_details, None,
                                      None)
        else:
            data = result.data

        if query.format == "cube":
            # for f, full_name in self.snowflake.tables:
            #     if f != '.' or (test_dots(cols) and is_list(query.select)):
            #         num_rows = len(result.data)
            #         num_cols = MAX([c.push_column for c in cols]) + 1 if len(cols) else 0
            #         map_index_to_name = {c.push_column: c.push_column_name for c in cols}
            #         temp_data = [[None] * num_rows for _ in range(num_cols)]
            #         for rownum, d in enumerate(result.data):
            #             for c in cols:
            #                 if c.push_child == ".":
            #                     temp_data[c.push_column][rownum] = c.pull(d)
            #                 else:
            #                     column = temp_data[c.push_column][rownum]
            #                     if column is None:
            #                         column = temp_data[c.push_column][rownum] = {}
            #                     column[c.push_child] = c.pull(d)
            #         output = Data(
            #             meta={"format": "cube"},
            #             data={n: temp_data[c] for c, n in map_index_to_name.items()},
            #             edges=[{
            #                 "name": "rownum",
            #                 "domain": {
            #                     "type": "rownum",
            #                     "min": 0,
            #                     "max": num_rows,
            #                     "interval": 1
            #                 }
            #             }]
            #         )
            #         return output

            if is_list(query.select) or is_op(query.select.value, LeavesOp):
                num_rows = len(data)
                temp_data = {
                    c.push_column_name: [None] * num_rows
                    for c in cols
                }
                for rownum, d in enumerate(data):
                    for c in cols:
                        temp_data[c.push_column_name][rownum] = d[c.push_name]
                return Data(meta={"format": "cube"},
                            data=temp_data,
                            edges=[{
                                "name": "rownum",
                                "domain": {
                                    "type": "rownum",
                                    "min": 0,
                                    "max": num_rows,
                                    "interval": 1
                                }
                            }])
            else:
                num_rows = len(data)
                map_index_to_name = {
                    c.push_column: c.push_column_name
                    for c in cols
                }
                temp_data = [data]

                return Data(meta={"format": "cube"},
                            data={
                                n: temp_data[c]
                                for c, n in map_index_to_name.items()
                            },
                            edges=[{
                                "name": "rownum",
                                "domain": {
                                    "type": "rownum",
                                    "min": 0,
                                    "max": num_rows,
                                    "interval": 1
                                }
                            }])

        elif query.format == "table":
            # for f, _ in self.snowflake.tables:
            #     if frum.endswith(f):
            #         num_column = MAX([c.push_column for c in cols]) + 1
            #         header = [None] * num_column
            #         for c in cols:
            #             header[c.push_column] = c.push_column_name
            #
            #         output_data = []
            #         for d in result.data:
            #             row = [None] * num_column
            #             for c in cols:
            #                 set_column(row, c.push_column, c.push_child, c.pull(d))
            #             output_data.append(row)
            #
            #         return Data(
            #             meta={"format": "table"},
            #             header=header,
            #             data=output_data
            #         )
            if is_list(query.select) or is_op(query.select.value, LeavesOp):
                column_names = [None] * (max(c.push_column for c in cols) + 1)
                for c in cols:
                    column_names[c.push_column] = c.push_column_name

                temp_data = []
                for rownum, d in enumerate(data):
                    row = [None] * len(column_names)
                    for c in cols:
                        row[c.push_column] = d[c.push_name]
                    temp_data.append(row)

                return Data(meta={"format": "table"},
                            header=column_names,
                            data=temp_data)
            else:
                column_names = listwrap(query.select).name
                return Data(meta={"format": "table"},
                            header=column_names,
                            data=[[d] for d in data])

        else:
            # for f, _ in self.snowflake.tables:
            #     if frum.endswith(f) or (test_dots(cols) and is_list(query.select)):
            #         data = []
            #         for d in result.data:
            #             row = Data()
            #             for c in cols:
            #                 if c.push_child == ".":
            #                     row[c.push_name] = c.pull(d)
            #                 elif c.num_push_columns:
            #                     tuple_value = row[c.push_name]
            #                     if not tuple_value:
            #                         tuple_value = row[c.push_name] = [None] * c.num_push_columns
            #                     tuple_value[c.push_child] = c.pull(d)
            #                 else:
            #                     row[c.push_name][c.push_child] = c.pull(d)
            #
            #             data.append(row)
            #
            #         return Data(
            #             meta={"format": "list"},
            #             data=data
            #         )

            if is_list(query.select) or is_op(query.select.value, LeavesOp):
                temp_data = []
                for rownum, d in enumerate(data):
                    row = {}
                    for c in cols:
                        row[c.push_column_name] = d[c.push_name]
                    temp_data.append(row)
                return Data(meta={"format": "list"}, data=temp_data)
            else:
                return Data(meta={"format": "list"}, data=data)
Пример #25
0
                    pull=pull,
                    sql=sql,
                    type=sql_type_to_json_type[json_type],
                    column_alias=sql_name)

            vals = [v for t, v in edge_values]
            if query_edge.domain.type == "set":
                domain_name = quote_column("d" + text_type(edge_index) + "c" +
                                           text_type(column_index))
                domain_names = [domain_name]
                if len(edge_names) > 1:
                    Log.error("Do not know how to handle")
                if query_edge.value:
                    domain = SQL_UNION_ALL.join(
                        SQL_SELECT +
                        sql_alias(quote_value(coalesce(p.dataIndex, i)),
                                  quote_column("rownum")) + SQL_COMMA +
                        sql_alias(quote_value(p.value), domain_name)
                        for i, p in enumerate(query_edge.domain.partitions))
                    if query_edge.allowNulls:
                        domain += (SQL_UNION_ALL + SQL_SELECT + sql_alias(
                            quote_value(len(query_edge.domain.partitions)),
                            quote_column("rownum")) + SQL_COMMA +
                                   sql_alias(SQL_NULL, domain_name))
                    where = None
                    join_type = SQL_LEFT_JOIN if query_edge.allowNulls else SQL_INNER_JOIN
                    on_clause = (SQL_OR.join(
                        join_column(edge_alias, k) + " = " + v
                        for k, v in zip(domain_names, vals)) + SQL_OR +
                                 sql_iso(
                                     join_column(edge_alias, domain_name) +
Пример #26
0
                    pull=pull,
                    sql=sql,
                    type=sql_type_to_json_type[sql_type],
                    column_alias=sql_name
                )

            vals = [v for t, v in edge_values]
            if query_edge.domain.type == "set":
                domain_name = "d" + text(edge_index) + "c" + text(column_index)
                domain_names = [domain_name]
                if len(edge_names) > 1:
                    Log.error("Do not know how to handle")
                if query_edge.value:
                    domain = SQL_UNION_ALL.join(
                        SQL_SELECT +
                        sql_alias(quote_value(coalesce(p.dataIndex, i)), quote_column("rownum")) + SQL_COMMA +
                        sql_alias(quote_value(p.value), domain_name)
                        for i, p in enumerate(query_edge.domain.partitions)
                    )
                    if query_edge.allowNulls:
                        domain += (
                            SQL_UNION_ALL + SQL_SELECT +
                            sql_alias(quote_value(len(query_edge.domain.partitions)), quote_column("rownum")) + SQL_COMMA +
                            sql_alias(SQL_NULL, domain_name)
                        )
                    where = None
                    join_type = SQL_LEFT_JOIN if query_edge.allowNulls else SQL_INNER_JOIN
                    on_clause = (
                        SQL_OR.join(
                            quote_column(edge_alias, k) + " = " + v
                            for k, v in zip(domain_names, vals)
Пример #27
0
    def get_tuids(self, branch, revision, files):
        """
        GET TUIDS FROM ENDPOINT, AND STORE IN DB
        :param branch: BRANCH TO FIND THE REVISION/FILE
        :param revision: THE REVISION NUNMBER
        :param files: THE FULL PATHS TO THE FILES
        :return: MAP FROM FILENAME TO TUID LIST
        """

        # SCRUB INPUTS
        revision = revision[:12]
        files = [file.lstrip('/') for file in files]

        with Timer(
            "ask tuid service for {{num}} files at {{revision|left(12)}}",
            {"num": len(files), "revision": revision},
            silent=not self.enabled
        ):
            response = self.db.query(
                "SELECT file, tuids FROM tuid WHERE revision=" + quote_value(revision) +
                " AND file IN " + quote_list(files)
            )
            found = {file: json2value(tuids) for file, tuids in response.data}

            try:
                remaining = set(files) - set(found.keys())
                new_response = None
                if remaining:
                    request = wrap({
                        "from": "files",
                        "where": {"and": [
                            {"eq": {"revision": revision}},
                            {"in": {"path": remaining}},
                            {"eq": {"branch": branch}}
                        ]},
                        "branch": branch,
                        "meta": {
                            "format": "list",
                            "request_time": Date.now()
                        }
                    })
                    if self.push_queue is not None:
                        if DEBUG:
                            Log.note("record tuid request to SQS: {{timestamp}}", timestamp=request.meta.request_time)
                        self.push_queue.add(request)
                    else:
                        if DEBUG:
                            Log.note("no recorded tuid request")

                    if not self.enabled:
                        return found

                    new_response = http.post_json(
                        self.endpoint,
                        json=request,
                        timeout=self.timeout
                    )

                    with self.db.transaction() as transaction:
                        command = "INSERT INTO tuid (revision, file, tuids) VALUES " + sql_list(
                            quote_list((revision, r.path, value2json(r.tuids)))
                            for r in new_response.data
                            if r.tuids != None
                        )
                        if not command.endswith(" VALUES "):
                            transaction.execute(command)
                    self.num_bad_requests = 0

                found.update({r.path: r.tuids for r in new_response.data} if new_response else {})
                return found

            except Exception as e:
                self.num_bad_requests += 1
                Till(seconds=SLEEP_ON_ERROR).wait()
                if self.enabled and self.num_bad_requests >= 3:
                    self.enabled = False
                    Log.error("TUID service has problems.", cause=e)
                return found
Пример #28
0
def test_very_distant_files(service):
    new_rev = "6e8e861540e6"
    old_rev = "1e2c9151a09e"
    test_files = ["docshell/base/nsDocShell.cpp"]

    with service.conn.transaction() as t:
        t.execute("DELETE FROM annotations WHERE revision = " +
                  quote_value(new_rev))
        for file in test_files:
            t.execute("UPDATE latestFileMod SET revision = " +
                      quote_value(old_rev) + " WHERE file = " +
                      quote_value(file))

    old_tuids, _ = service.get_tuids_from_files(test_files,
                                                old_rev,
                                                use_thread=False,
                                                max_csets_proc=10000)
    new_tuids, _ = service.get_tuids_from_files(test_files,
                                                new_rev,
                                                use_thread=False,
                                                max_csets_proc=10000)

    lines_moved = {"docshell/base/nsDocShell.cpp": {1028: 1026, 1097: 1029}}
    lines_added = {"docshell/base/nsDocShell.cpp": [2770]}

    Log.note("Check output manually for any abnormalities as well.")

    completed = 0
    for file, old_file_tuids in old_tuids:
        if file in lines_moved:
            old_moved_tuids = {}
            print("OLD:")
            for tuid_map in old_file_tuids:
                print(str(tuid_map.line) + ":" + str(tuid_map.tuid))
                if tuid_map.line in lines_moved[file].keys():
                    old_moved_tuids[tuid_map.line] = tuid_map.tuid

            assert len(old_moved_tuids) == len(lines_moved[file].keys())

            print("\n\nNEW:")
            new_moved_tuids = {}
            for new_file, tmp_tuids in new_tuids:
                if new_file == file:
                    tmp_lines = [
                        lines_moved[file][line] for line in lines_moved[file]
                    ]
                    for tuid_map in tmp_tuids:
                        print(str(tuid_map.line) + ":" + str(tuid_map.tuid))
                        if tuid_map.line in tmp_lines:
                            new_moved_tuids[tuid_map.line] = tuid_map.tuid
                    break

            assert len(new_moved_tuids) == len(old_moved_tuids)
            for line_moved in old_moved_tuids:
                old_tuid = old_moved_tuids[line_moved]
                new_line = lines_moved[file][line_moved]
                assert new_line in new_moved_tuids
                assert old_tuid == new_moved_tuids[new_line]
            completed += 1

        if file in lines_added:
            for new_file, tmp_tuids in new_tuids:
                new_file_tuids = []
                if new_file == file:
                    for tuid_map in tmp_tuids:
                        if tuid_map.line in lines_added[file]:
                            new_file_tuids.append(tuid_map.tuid)
                    new_file_tuids = tmp_tuids

                    # No tuids from the new should be in the old
                    # so this intersection should always be empty.
                    assert len(
                        set(new_file_tuids)
                        & set([t.tuid for t in old_file_tuids])) <= 0
                    completed += 1

                    break
    assert completed == len(lines_moved.keys()) + len(lines_added.keys())
Пример #29
0
    def _set_op(self, query, frum):
        # GET LIST OF COLUMNS
        frum_path = split_field(frum)
        primary_nested_path = join_field(frum_path[1:])
        vars_ = UNION([s.value.vars() for s in listwrap(query.select)])
        schema = self.sf.tables[primary_nested_path].schema

        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path, sub_table) in enumerate(self.sf.tables.items())
        }

        active_columns = {".": []}
        for cname, cols in schema.items():
            if any(startswith_field(cname, v) for v in vars_):
                for c in cols:
                    if c.type in STRUCT:
                        continue
                    nest = c.nested_path[0]
                    active = active_columns.get(nest)
                    if not active:
                        active = active_columns[nest] = []
                    active.append(c)

        for nested_path, s in self.sf.tables.items():
            for cname, cols in s.schema.items():
                if not any(startswith_field(cname, c.names[c.nested_path[0]]) for n, cc in active_columns.items() for c in cc):
                    for c in cols:
                        if c.type in STRUCT:
                            continue
                        nest = c.nested_path[0]
                        active = active_columns.get(nest)
                        if not active:
                            active = active_columns[nest] = []
                        active.append(c)

        # ANY VARS MENTIONED WITH NO COLUMNS?
        for v in vars_:
            if not any(startswith_field(cname, v) for cname in schema.keys()):
                active_columns["."].append(Column(
                    names={".": v},
                    type="null",
                    es_column=".",
                    es_index=".",
                    nested_path=["."]
                ))

        # EVERY COLUMN, AND THE INDEX IT TAKES UP
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        index_to_uid = {}  # FROM NESTED PATH TO THE INDEX OF UID
        sql_selects = []  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path, sub_table) in enumerate(self.sf.tables.items())
            }

        sorts = []
        if query.sort:
            for s in query.sort:
                col = s.value.to_sql(schema)[0]
                for t, sql in col.sql.items():
                    json_type = sql_type_to_json_type[t]
                    if json_type in STRUCT:
                        continue
                    column_number = len(sql_selects)
                    # SQL HAS ABS TABLE REFERENCE
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(sql + " AS " + column_alias)
                    if s.sort == -1:
                        sorts.append(column_alias + " IS NOT NULL")
                        sorts.append(column_alias + " DESC")
                    else:
                        sorts.append(column_alias + " IS NULL")
                        sorts.append(column_alias)

        selects = []
        primary_doc_details = Data()
        # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH
        # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED
        for nested_path, sub_table in self.sf.tables.items():
            nested_doc_details = {
                "sub_table": sub_table,
                "children": [],
                "index_to_column": {},
                "nested_path": [nested_path]  # fake the real nested path, we only look at [0] anyway
            }

            # INSERT INTO TREE
            if not primary_doc_details:
                primary_doc_details = nested_doc_details
            else:
                def place(parent_doc_details):
                    if startswith_field(nested_path, parent_doc_details['nested_path'][0]):
                        for c in parent_doc_details['children']:
                            if place(c):
                                return True
                        parent_doc_details['children'].append(nested_doc_details)

                place(primary_doc_details)

            alias = nested_doc_details['alias'] = nest_to_alias[nested_path]

            if nested_path=="." and quoted_GUID in vars_:
                column_number = index_to_uid[nested_path] = nested_doc_details['id_coord'] = len(sql_selects)
                sql_select = alias + "." + quoted_GUID
                sql_selects.append(sql_select + " AS " + _make_column_name(column_number))
                index_to_column[column_number] = nested_doc_details['index_to_column'][column_number] = ColumnMapping(
                    push_name="_id",
                    push_column_name="_id",
                    push_column=0,
                    push_child=".",
                    sql=sql_select,
                    pull=get_column(column_number),
                    type="string",
                    column_alias=_make_column_name(column_number),
                    nested_path=[nested_path]           # fake the real nested path, we only look at [0] anyway
                )
                query.select = [s for s in listwrap(query.select) if s.name!="_id"]


            # WE ALWAYS ADD THE UID AND ORDER
            column_number = index_to_uid[nested_path] = nested_doc_details['id_coord'] = len(sql_selects)
            sql_select = alias + "." + quoted_UID
            sql_selects.append(sql_select + " AS " + _make_column_name(column_number))
            if nested_path !=".":
                index_to_column[column_number]=ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=[nested_path],            # fake the real nested path, we only look at [0] anyway
                    column_alias=_make_column_name(column_number)

                )
                column_number = len(sql_selects)
                sql_select = alias + "." + quote_table(ORDER)
                sql_selects.append(sql_select + " AS " + _make_column_name(column_number))
                index_to_column[column_number]=ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=[nested_path],            # fake the real nested path, we only look at [0] anyway
                    column_alias=_make_column_name(column_number)

                )

            # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM
            if nested_path not in active_columns:
                continue

            if len(active_columns[nested_path]) != 0:
                # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE
                si = 0
                for s in listwrap(query.select):
                    try:
                        column_number = len(sql_selects)
                        s.pull = get_column(column_number)
                        db_columns = s.value.to_sql(schema)

                        if isinstance(s.value, LeavesOp):
                            for column in db_columns:
                                if isinstance(column.nested_path, list):
                                    column.nested_path=column.nested_path[0]
                                if column.nested_path and column.nested_path!=nested_path:
                                    continue
                                for t, unsorted_sql in column.sql.items():
                                    json_type = sql_type_to_json_type[t]
                                    if json_type in STRUCT:
                                        continue
                                    column_number = len(sql_selects)
                                    # SQL HAS ABS TABLE REFERENCE
                                    column_alias = _make_column_name(column_number)
                                    if concat_field(alias, unsorted_sql) in selects and len(unsorted_sql.split())==1:
                                        continue
                                    selects.append(concat_field(alias, unsorted_sql))
                                    sql_selects.append(alias + "." + unsorted_sql + " AS " + column_alias)
                                    index_to_column[column_number] = nested_doc_details['index_to_column'][column_number] = ColumnMapping(
                                        push_name=literal_field(get_property_name(concat_field(s.name, column.name))),
                                        push_column_name=get_property_name(concat_field(s.name, column.name)),
                                        push_column=si,
                                        push_child=".",
                                        pull=get_column(column_number),
                                        sql=unsorted_sql,
                                        type=json_type,
                                        column_alias=column_alias,
                                        nested_path=[nested_path]           # fake the real nested path, we only look at [0] anyway
                                    )
                                    si += 1
                        else:
                            for column in db_columns:
                                if isinstance(column.nested_path, list):
                                    column.nested_path=column.nested_path[0]
                                if column.nested_path and column.nested_path!=nested_path:
                                    continue
                                for t, unsorted_sql in column.sql.items():
                                    json_type = sql_type_to_json_type[t]
                                    if json_type in STRUCT:
                                        continue
                                    column_number = len(sql_selects)
                                    # SQL HAS ABS TABLE REFERENCE
                                    column_alias = _make_column_name(column_number)
                                    if concat_field(alias, unsorted_sql) in selects and len(unsorted_sql.split())==1:
                                        continue
                                    selects.append(concat_field(alias, unsorted_sql))
                                    sql_selects.append(alias + "." + unsorted_sql + " AS " + column_alias)
                                    index_to_column[column_number] = nested_doc_details['index_to_column'][column_number] = ColumnMapping(
                                        push_name=s.name,
                                        push_column_name=s.name,
                                        push_column=si,
                                        push_child=column.name,
                                        pull=get_column(column_number),
                                        sql=unsorted_sql,
                                        type=json_type,
                                        column_alias=column_alias,
                                        nested_path=[nested_path]
                                        # fake the real nested path, we only look at [0] anyway
                                    )
                    finally:
                        si += 1
            elif startswith_field(nested_path, primary_nested_path):
                # ADD REQUIRED COLUMNS, FOR DEEP STUFF
                for ci, c in enumerate(active_columns[nested_path]):
                    if c.type in STRUCT:
                        continue

                    column_number = len(sql_selects)
                    nested_path = c.nested_path
                    unsorted_sql = nest_to_alias[nested_path[0]] + "." + quote_table(c.es_column)
                    column_alias = _make_column_name(column_number)
                    if concat_field(alias, unsorted_sql) in selects and len(unsorted_sql.split())==1:
                        continue
                    selects.append(concat_field(alias, unsorted_sql))
                    sql_selects.append(alias + "." + unsorted_sql + " AS " + column_alias)
                    index_to_column[column_number] = nested_doc_details['index_to_column'][column_number] = ColumnMapping(
                        push_name=s.name,
                        push_column_name=s.name,
                        push_column=si,
                        push_child=relative_field(c.names["."], s.name),
                        pull=get_column(column_number),
                        sql=unsorted_sql,
                        type=c.type,
                        column_alias=column_alias,
                        nested_path=nested_path
                    )

        where_clause = query.where.to_sql(schema, boolean=True)[0].sql.b
        unsorted_sql = self._make_sql_for_one_nest_in_set_op(
            ".",
            sql_selects,
            where_clause,
            active_columns,
            index_to_column
        )

        for n, _ in self.sf.tables.items():
            sorts.append(COLUMN + text_type(index_to_uid[n]))

        ordered_sql = (
            "SELECT * FROM (\n" +
            unsorted_sql +
            "\n)" +
            "\nORDER BY\n" + ",\n".join(sorts) +
            "\nLIMIT " + quote_value(query.limit)
        )
        self.db.create_new_functions()  #creating new functions: regexp
        result = self.db.query(ordered_sql)

        def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id, parent_id_coord):
            """
            :param rows: REVERSED STACK OF ROWS (WITH push() AND pop())
            :param row: CURRENT ROW BEING EXTRACTED
            :param nested_doc_details: {
                    "nested_path": wrap_nested_path(nested_path),
                    "index_to_column": map from column number to column details
                    "children": all possible direct decedents' nested_doc_details
                 }
            :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop)
            :param parent_id_coord: the column number for the parent id (so we ca extract from each row)
            :return: the nested property (usually an array)
            """
            previous_doc_id = None
            doc = Null
            output = []
            id_coord = nested_doc_details['id_coord']

            while True:
                doc_id = row[id_coord]

                if doc_id == None or (parent_id_coord is not None and row[parent_id_coord] != parent_doc_id):
                    rows.append(row)  # UNDO PREVIOUS POP (RECORD IS NOT A NESTED RECORD OF parent_doc)
                    return output

                if doc_id != previous_doc_id:
                    previous_doc_id = doc_id
                    doc = Null
                    curr_nested_path = nested_doc_details['nested_path'][0]
                    index_to_column = nested_doc_details['index_to_column'].items()
                    if index_to_column:
                        for i, c in index_to_column:
                            value = row[i]
                            if value == None:
                                continue
                            if value == '':
                                continue

                            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                                # ASSIGN INNER PROPERTIES
                                relative_path=join_field([c.push_name]+split_field(c.push_child))
                            else:           # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                                relative_path=c.push_child

                            if relative_path == ".":
                                doc = value
                            elif doc is Null:
                                doc = Data()
                                doc[relative_path] = value
                            else:
                                doc[relative_path] = value

                for child_details in nested_doc_details['children']:
                    # EACH NESTED TABLE MUST BE ASSEMBLED INTO A LIST OF OBJECTS
                    child_id = row[child_details['id_coord']]
                    if child_id is not None:
                        nested_value = _accumulate_nested(rows, row, child_details, doc_id, id_coord)
                        if nested_value:
                            push_name = child_details['nested_path'][0]
                            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                                # ASSIGN INNER PROPERTIES
                                relative_path=relative_field(push_name, curr_nested_path)
                            else:           # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                                relative_path="."

                            if relative_path == "." and doc is Null:
                                doc = nested_value
                            elif relative_path == ".":
                                doc[push_name] = unwraplist([v[push_name] for v in nested_value])
                            elif doc is Null:
                                doc = Data()
                                doc[relative_path] = unwraplist(nested_value)
                            else:
                                doc[relative_path] = unwraplist(nested_value)

                output.append(doc)

                try:
                    row = rows.pop()
                except IndexError:
                    return output

        cols = tuple([i for i in index_to_column.values() if i.push_name != None])
        rows = list(reversed(unwrap(result.data)))
        if rows:
            row = rows.pop()
            data = _accumulate_nested(rows, row, primary_doc_details, None, None)
        else:
            data = result.data

        if query.format == "cube":
            for f, _ in self.sf.tables.items():
                if frum.endswith(f) or (test_dots(cols) and isinstance(query.select, list)):
                    num_rows = len(result.data)
                    num_cols = MAX([c.push_column for c in cols]) + 1 if len(cols) else 0
                    map_index_to_name = {c.push_column: c.push_column_name for c in cols}
                    temp_data = [[None]*num_rows for _ in range(num_cols)]
                    for rownum, d in enumerate(result.data):
                        for c in cols:
                            if c.push_child == ".":
                                temp_data[c.push_column][rownum] = c.pull(d)
                            else:
                                column = temp_data[c.push_column][rownum]
                                if column is None:
                                    column = temp_data[c.push_column][rownum] = Data()
                                column[c.push_child] = c.pull(d)
                    output = Data(
                        meta={"format": "cube"},
                        data={n: temp_data[c] for c, n in map_index_to_name.items()},
                        edges=[{
                            "name": "rownum",
                            "domain": {
                                "type": "rownum",
                                "min": 0,
                                "max": num_rows,
                                "interval": 1
                            }
                        }]
                    )
                    return output

            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                num_rows = len(data)
                map_index_to_name = {c.push_column: c.push_column_name for c in cols}
                temp_data = Data()
                for rownum, d in enumerate(data):
                    for k, v in d.items():
                        if temp_data[k] == None:
                            temp_data[k] = [None] * num_rows
                        temp_data[k][rownum] = v
                return Data(
                    meta={"format": "cube"},
                    data={n: temp_data[literal_field(n)] for c, n in map_index_to_name.items()},
                    edges=[{
                        "name": "rownum",
                        "domain": {
                            "type": "rownum",
                            "min": 0,
                            "max": num_rows,
                            "interval": 1
                        }
                    }]
                )
            else:
                num_rows = len(data)
                map_index_to_name = {c.push_column: c.push_column_name for c in cols}
                temp_data = [data]

                return Data(
                    meta={"format": "cube"},
                    data={n: temp_data[c] for c, n in map_index_to_name.items()},
                    edges=[{
                        "name": "rownum",
                        "domain": {
                            "type": "rownum",
                            "min": 0,
                            "max": num_rows,
                            "interval": 1
                        }
                    }]
                )

        elif query.format == "table":
            for f, _ in self.sf.tables.items():
                if  frum.endswith(f):
                    num_column = MAX([c.push_column for c in cols])+1
                    header = [None]*num_column
                    for c in cols:
                        header[c.push_column] = c.push_column_name

                    output_data = []
                    for d in result.data:
                        row = [None] * num_column
                        for c in cols:
                            set_column(row, c.push_column, c.push_child, c.pull(d))
                        output_data.append(row)

                    return Data(
                        meta={"format": "table"},
                        header=header,
                        data=output_data
                    )
            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                num_rows = len(data)
                column_names= [None]*(max(c.push_column for c in cols) + 1)
                for c in cols:
                    column_names[c.push_column] = c.push_column_name

                temp_data = []
                for rownum, d in enumerate(data):
                    row =[None] * len(column_names)
                    for i, (k, v) in enumerate(sorted(d.items())):
                        for c in cols:
                            if k==c.push_name:
                                row[c.push_column] = v
                    temp_data.append(row)

                return Data(
                    meta={"format": "table"},
                    header=column_names,
                    data=temp_data
                )
            else:
                column_names = listwrap(query.select).name
                return Data(
                    meta={"format": "table"},
                    header=column_names,
                    data=[[d] for d in data]
                )

        else:
            for f, _ in self.sf.tables.items():
                if frum.endswith(f) or (test_dots(cols) and isinstance(query.select, list)):
                    data = []
                    for d in result.data:
                        row = Data()
                        for c in cols:
                            if c.push_child == ".":
                                row[c.push_name] = c.pull(d)
                            elif c.num_push_columns:
                                tuple_value = row[c.push_name]
                                if not tuple_value:
                                    tuple_value = row[c.push_name] = [None] * c.num_push_columns
                                tuple_value[c.push_child] = c.pull(d)
                            elif not isinstance(query.select, list):   # select is value type
                                row[c.push_child]=c.pull(d)
                            else:
                                row[c.push_name][c.push_child] = c.pull(d)

                        data.append(row)

                    return Data(
                        meta={"format": "list"},
                        data=data
                    )

            if isinstance(query.select, list) or isinstance(query.select.value, LeavesOp):
                temp_data=[]
                for rownum, d in enumerate(data):
                    row = {}
                    for k, v in d.items():
                        for c in cols:
                            if c.push_name==c.push_column_name==k:
                                    row[c.push_column_name] = v
                            elif c.push_name==k and c.push_column_name!=k:
                                    row[c.push_column_name] = v
                    temp_data.append(row)
                return Data(
                    meta={"format": "list"},
                    data=temp_data
                )
            else:
                return Data(
                    meta={"format": "list"},
                    data=data
                )
Пример #30
0
    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | set(listwrap(command['clear']))
        for c in self.get_leaves():
            if c.name in touched_columns and c.nested_path and len(
                    c.name) > len(c.nested_path[0]):
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where)
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars for c in self.columns.get(v, Null)
            if c.type not in STRUCT
        }
        where_sql = where.map(_map).to_sql()
        new_columns = set(command.set.keys()) - set(self.columns.keys())
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_type(nested_value)
            column = Column(names={".": new_column_name},
                            type=ctype,
                            es_index=self.sf.fact,
                            es_column=typed_column(new_column_name, ctype))
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_type(nested_value) == "nested":
                nested_table_name = concat_field(self.sf.fact,
                                                 nested_column_name)
                nested_table = nested_tables[nested_column_name]
                self_primary_key = ",".join(
                    quote_table(c.es_column) for u in self.uid
                    for c in self.columns[u])
                extra_key_name = UID_PREFIX + "id" + text_type(len(self.uid))
                extra_key = [e
                             for e in nested_table.columns[extra_key_name]][0]

                sql_command = "DELETE FROM " + quote_table(nested_table.name) + \
                              "\nWHERE EXISTS (" + \
                              "\nSELECT 1 " + \
                              "\nFROM " + quote_table(nested_table.name) + " n" + \
                              "\nJOIN (" + \
                              "\nSELECT " + self_primary_key + \
                              "\nFROM " + quote_table(self.sf.fact) + \
                              "\nWHERE " + where_sql + \
                              "\n) t ON " + \
                              " AND ".join(
                                  "t." + quote_table(c.es_column) + " = n." + quote_table(c.es_column)
                                  for u in self.uid
                                  for c in self.columns[u]
                              ) + \
                              ")"
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d,
                                         Data(),
                                         doc_collection,
                                         path=nested_column_name)

                prefix = "INSERT INTO " + quote_table(nested_table.name) + \
                         "(" + \
                         self_primary_key + "," + \
                         quote_column(extra_key) + "," + \
                         ",".join(
                             quote_table(c.es_column)
                             for c in doc_collection.get(".", Null).active_columns
                         ) + ")"

                # BUILD THE PARENT TABLES
                parent = "\nSELECT " + \
                         self_primary_key + \
                         "\nFROM " + quote_table(self.sf.fact) + \
                         "\nWHERE " + jx_expression(command.where).to_sql()

                # BUILD THE RECORDS
                children = " UNION ALL ".join(
                    "\nSELECT " + quote_value(i) + " " +
                    quote_table(extra_key.es_column) + "," + ",".join(
                        quote_value(row[c.name]) + " " +
                        quote_table(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns)
                    for i, row in enumerate(
                        doc_collection.get(".", Null).rows))

                sql_command = prefix + \
                              "\nSELECT " + \
                              ",".join(
                                  "p." + quote_table(c.es_column)
                                  for u in self.uid for c in self.columns[u]
                              ) + "," + \
                              "c." + quote_column(extra_key) + "," + \
                              ",".join(
                                  "c." + quote_table(c.es_column)
                                  for c in doc_collection.get(".", Null).active_columns
                              ) + \
                              "\nFROM (" + parent + ") p " + \
                              "\nJOIN (" + children + \
                              "\n) c on 1=1"

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(names={".": c.name},
                                        type=c.type,
                                        es_index=c.es_index,
                                        es_column=c.es_column,
                                        nested_path=[nested_column_name] +
                                        c.nested_path)
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.type not in [
                                c.type for c in self.columns[c.name]
                        ]:
                            self.columns[column.name].add(column)

        command = (
            "UPDATE " + quote_table(self.sf.fact) + " SET " + ",\n".join([
                quote_column(c) + "=" + quote_value(get_if_type(v, c.type))
                for k, v in command.set.items() if get_type(v) != "nested"
                for c in self.columns[k]
                if c.type != "nested" and len(c.nested_path) == 1
            ] + [
                quote_column(c) + "=NULL"
                for k in listwrap(command['clear']) if k in self.columns
                for c in self.columns[k]
                if c.type != "nested" and len(c.nested_path) == 1
            ]) + " WHERE " + where_sql)

        self.db.execute(command)
Пример #31
0
 def execute(self, sql, params=None):
     if params:
         for p in params:
             sql = sql.replace('?', quote_value(p), 1)
     return self.transaction.execute(sql)
Пример #32
0
    def create_and_insert_tuids(self, revision):
        self.replace_line_with_tuidline()

        line_origins = []
        all_new_lines = []
        for line_obj in self.lines:
            line_entry = (line_obj.filename, revision, line_obj.line)
            if not line_obj.tuid or line_obj.is_new_line:
                all_new_lines.append(line_obj.line)
            line_origins.append(line_entry)

        with self.tuid_service.conn.transaction() as t:
            # Get the new lines, excluding those that have existing tuids
            existing_tuids = {}
            if len(all_new_lines) > 0:
                try:
                    existing_tuids = {
                        line: tuid
                        for tuid, file, revision, line in t.query(
                            "SELECT tuid, file, revision, line FROM temporal"
                            " WHERE file = " + quote_value(self.filename)+
                            " AND revision = " + quote_value(revision) +
                            " AND line IN " + quote_set(all_new_lines)
                        ).data
                    }
                except Exception as e:
                    # Log takes out important output, use print instead
                    self.failed_file = True
                    print("Trying to find new lines: " + str(all_new_lines))
                    Log.error("Error encountered:", cause=e)

            insert_entries = []
            insert_lines = set(all_new_lines) - set(existing_tuids.keys())
            if len(insert_lines) > 0:
                try:
                    insert_entries = [
                        (self.tuid_service.tuid(),) + line_origins[linenum-1]
                        for linenum in insert_lines
                    ]
                    insert_into_db_chunked(
                        t,
                        insert_entries,
                        "INSERT INTO temporal (tuid, file, revision, line) VALUES "
                    )
                except Exception as e:
                    Log.note(
                        "Failed to insert new tuids (likely due to merge conflict) on {{file}}: {{cause}}",
                        file=self.filename,
                        cause=e
                    )
                    self.failed_file = True
                    return

            fmt_inserted_lines = {line: tuid for tuid, _, _, line in insert_entries}
            for line_obj in self.lines:
                # If a tuid already exists for this line,
                # replace, otherwise, use the newly created one.
                if line_obj.line in existing_tuids:
                    line_obj.tuid = existing_tuids[line_obj.line]
                elif line_obj.line in fmt_inserted_lines:
                    line_obj.tuid = fmt_inserted_lines[line_obj.line]

                if not line_obj.tuid:
                    Log.warning(
                        "Cannot find TUID at {{file}} and {{rev}}for: {{line}}",
                        file=self.filename,
                        rev=revision,
                        line=str(line_obj)
                    )
                    self.failed_file = True
                    return
Пример #33
0
 def get(self, sql, params=None):
     if params:
         for p in params:
             sql = sql.replace('?', quote_value(p), 1)
     return self.transaction.query(sql).data
Пример #34
0
 def to_sql(self, schema, not_null=False, boolean=False):
     return wrap([{"name": ".", "sql": {"n": quote_value(self.value)}}])
Пример #35
0
    def get_tuids(self, branch, revision, files):
        """
        GET TUIDS FROM ENDPOINT, AND STORE IN DB
        :param branch: BRANCH TO FIND THE REVISION/FILE
        :param revision: THE REVISION NUNMBER
        :param files: THE FULL PATHS TO THE FILES
        :return: MAP FROM FILENAME TO TUID LIST
        """

        # SCRUB INPUTS
        revision = revision[:12]
        files = [file.lstrip('/') for file in files]

        with Timer(
                "ask tuid service for {{num}} files at {{revision|left(12)}}",
            {
                "num": len(files),
                "revision": revision
            },
                silent=not self.enabled):
            response = self.db.query(
                "SELECT file, tuids FROM tuid WHERE revision=" +
                quote_value(revision) + " AND file IN " + quote_list(files))
            found = {file: json2value(tuids) for file, tuids in response.data}

            try:
                remaining = set(files) - set(found.keys())
                new_response = None
                if remaining:
                    request = wrap({
                        "from": "files",
                        "where": {
                            "and": [{
                                "eq": {
                                    "revision": revision
                                }
                            }, {
                                "in": {
                                    "path": remaining
                                }
                            }, {
                                "eq": {
                                    "branch": branch
                                }
                            }]
                        },
                        "branch": branch,
                        "meta": {
                            "format": "list",
                            "request_time": Date.now()
                        }
                    })
                    if self.push_queue is not None:
                        if DEBUG:
                            Log.note(
                                "record tuid request to SQS: {{timestamp}}",
                                timestamp=request.meta.request_time)
                        self.push_queue.add(request)
                    else:
                        if DEBUG:
                            Log.note("no recorded tuid request")

                    if not self.enabled:
                        return found

                    new_response = http.post_json(self.endpoint,
                                                  json=request,
                                                  timeout=self.timeout)

                    with self.db.transaction() as transaction:
                        command = "INSERT INTO tuid (revision, file, tuids) VALUES " + sql_list(
                            quote_list((revision, r.path, value2json(r.tuids)))
                            for r in new_response.data if r.tuids != None)
                        if not command.endswith(" VALUES "):
                            transaction.execute(command)
                    self.num_bad_requests = 0

                found.update(
                    {r.path: r.tuids
                     for r in new_response.data} if new_response else {})
                return found

            except Exception as e:
                self.num_bad_requests += 1
                Till(seconds=SLEEP_ON_ERROR).wait()
                if self.enabled and self.num_bad_requests >= 3:
                    self.enabled = False
                    Log.error("TUID service has problems.", cause=e)
                return found
Пример #36
0
    def update(self, command):
        self.dirty = True
        try:
            command = wrap(command)
            DEBUG and Log.note(
                "Update {{timestamp}}: {{command|json}}",
                command=command,
                timestamp=Date(command["set"].last_updated),
            )
            eq = command.where.eq
            if eq.es_index:
                if len(eq) == 1:
                    if unwraplist(command.clear) == ".":
                        with self.locker:
                            del self.data[eq.es_index]
                        self.todo.add(
                            (
                                EXECUTE,
                                "DELETE FROM "
                                + db_table_name
                                + SQL_WHERE
                                + " es_index="
                                + quote_value(eq.es_index),
                            )
                        )
                        return

                    # FASTEST
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [c for cs in all_columns for c in cs]
                elif eq.es_column and len(eq) == 2:
                    # FASTER
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [
                            c
                            for cs in all_columns
                            for c in cs
                            if c.es_column == eq.es_column
                        ]

                else:
                    # SLOWER
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [
                            c
                            for cs in all_columns
                            for c in cs
                            if all(
                                c[k] == v for k, v in eq.items()
                            )  # THIS LINE IS VERY SLOW
                        ]
            else:
                columns = list(self)
                columns = jx.filter(columns, command.where)

            with self.locker:
                for col in columns:
                    DEBUG and Log.note(
                        "update column {{table}}.{{column}}",
                        table=col.es_index,
                        column=col.es_column,
                    )
                    for k in command["clear"]:
                        if k == ".":
                            self.todo.add((DELETE, col))
                            lst = self.data[col.es_index]
                            cols = lst[col.name]
                            cols.remove(col)
                            if len(cols) == 0:
                                del lst[col.name]
                                if len(lst) == 0:
                                    del self.data[col.es_index]
                            break
                        else:
                            col[k] = None
                    else:
                        # DID NOT DELETE COLUMNM ("."), CONTINUE TO SET PROPERTIES
                        for k, v in command.set.items():
                            col[k] = v
                        self.todo.add((UPDATE, col))

        except Exception as e:
            Log.error("should not happen", cause=e)