示例#1
0
    def _index_columns(self, columns):
        # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS
        indexed_values = [None] * len(columns)
        for i, s in enumerate(columns):
            index = self._index.get(s.value, None)
            if index is not None:
                indexed_values[i] = index
                continue

            function_name = convert.value2json(s.value.to_dict(),
                                               sort_keys=True)
            index = self._index.get(function_name, None)
            indexed_values[i] = index
            if index is not None:
                continue

            indexed_values[i] = index = self._index[function_name] = {}
            accessor = jx.get(s.value)
            for k, ii in self._unique_index.items():
                v = accessor(self._source[ii])
                j = index.get(v)
                if j is None:
                    j = index[v] = set()
                j |= {ii}
        return indexed_values
示例#2
0
    def window(self, window):
        if window.edges or window.sort:
            Log.error("not implemented")

        from pyLibrary.queries import jx

        # SET OP
        canonical = self.data.values()[0]
        accessor = jx.get(window.value)
        cnames = self.data.keys()

        # ANNOTATE EXISTING CUBE WITH NEW COLUMN
        m = self.data[window.name] = Matrix(dims=canonical.dims)
        for coord in canonical._all_combos():
            row = Dict(
            )  # IT IS SAD WE MUST HAVE A Dict(), THERE ARE {"script": expression} USING THE DOT NOTATION
            for k in cnames:
                row[k] = self.data[k][coord]
            for c, e in zip(coord, self.edges):
                row[e.name] = e.domain.partitions[c]
            m[coord] = accessor(
                row, Null,
                Null)  # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO

        self.select.append(window)
        return self
示例#3
0
 def __init__(self,
              rollover_field,
              rollover_interval,
              rollover_max,
              queue_size=10000,
              batch_size=5000,
              kwargs=None):
     """
     :param rollover_field: the FIELD with a timestamp to use for determining which index to push to
     :param rollover_interval: duration between roll-over to new index
     :param rollover_max: remove old indexes, do not add old records
     :param queue_size: number of documents to queue in memory
     :param batch_size: number of documents to push at once
     :param kwargs: plus additional ES settings
     :return:
     """
     self.settings = kwargs
     self.locker = Lock("lock for rollover_index")
     self.rollover_field = jx.get(rollover_field)
     self.rollover_interval = self.settings.rollover_interval = Duration(
         kwargs.rollover_interval)
     self.rollover_max = self.settings.rollover_max = Duration(
         kwargs.rollover_max)
     self.known_queues = {}  # MAP DATE TO INDEX
     self.cluster = elasticsearch.Cluster(self.settings)
示例#4
0
 def search(self, query):
     query = wrap(query)
     f = jx.get(query.query.filtered.filter)
     filtered = wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)])
     if query.fields:
         return wrap({"hits": {"total": len(filtered), "hits": [{"_id": d._id, "fields": unwrap(jx.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}})
     else:
         return wrap({"hits": {"total": len(filtered), "hits": filtered}})
    def __init__(self, name, db=None, uid=UID_PREFIX+"id", exists=False):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        Container.__init__(self, frum=None)
        if db:
            self.db = db
        else:
            self.db = db = Sqlite()
        self.name = name
        self.uid = listwrap(uid)

        self.columns = {}
        for u in uid:
            if not self.columns.get(u, None):
                cs = self.columns[u] = set()
            if u.startswith(UID_PREFIX):
                cs.add(Column(name=u, table=name, type="integer", es_column=typed_column(u, "integer"), es_index=name))
            else:
                cs.add(Column(name=u, table=name, type="text", es_column=typed_column(u, "text"), es_index=name))

        self.uid_accessor = jx.get(self.uid)
        self.nested_tables = {}  # MAP FROM TABLE NAME TO Table OBJECT
        if exists:
            # LOAD THE COLUMNS
            command = "PRAGMA table_info(" + quote_table(name) + ")"
            details = self.db.query(command)
            self.columns = {}
            for r in details:
                cname = untyped_column(r[1])
                ctype = r[2].lower()
                column = Column(
                    name=cname,
                    table=name,
                    type=ctype,
                    es_column=typed_column(cname, ctype),
                    es_index=name
                )

                cs = self.columns.get(name, Null)
                if not cs:
                    cs = self.columns[name] = set()
                cs.add(column)
        else:
            command = "CREATE TABLE " + quote_table(name) + "(" + \
                      (",".join(_quote_column(c) + " " + c.type for u, cs in self.columns.items() for c in cs)) + \
                      ", PRIMARY KEY (" + \
                      (", ".join(_quote_column(c) for u in self.uid for c in self.columns[u])) + \
                      "))"

            self.db.execute(command)
示例#6
0
 def __init__(self, rollover_field, rollover_interval, rollover_max, queue_size=10000, batch_size=5000, settings=None):
     """
     :param rollover_field: the FIELD with a timestamp to use for determining which index to push to
     :param rollover_interval: duration between roll-over to new index
     :param rollover_max: remove old indexes, do not add old records
     :param queue_size: number of documents to queue in memory
     :param batch_size: number of documents to push at once
     :param settings: plus additional ES settings
     :return:
     """
     self.settings = settings
     self.rollover_field = jx.get(rollover_field)
     self.rollover_interval = self.settings.rollover_interval = Duration(settings.rollover_interval)
     self.rollover_max = self.settings.rollover_max = Duration(settings.rollover_max)
     self.known_queues = {}  # MAP DATE TO INDEX
     self.cluster = elasticsearch.Cluster(self.settings)
    def update(self, command):
        """
        EXPECTING command == {"set":term, "clear":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS A JSON EXPRESSION FILTER
        """
        command = wrap(command)
        command_clear = listwrap(command["clear"])
        command_set = command.set.items()
        command_where = jx.get(command.where)

        for c in self.data:
            if command_where(c):
                for k in command_clear:
                    c[k] = None
                for k, v in command_set:
                    c[k] = v
示例#8
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "clear":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS A JSON EXPRESSION FILTER
        """
        command = wrap(command)
        command_clear = listwrap(command["clear"])
        command_set = command.set.items()
        command_where = jx.get(command.where)

        for c in self.data:
            if command_where(c):
                for k in command_clear:
                    c[k] = None
                for k, v in command_set:
                    c[k] = v
示例#9
0
    def window(self, window):
        if window.edges or window.sort:
            Log.error("not implemented")

        from pyLibrary.queries import jx

        # SET OP
        canonical = self.data.values()[0]
        accessor = jx.get(window.value)
        cnames = self.data.keys()

        # ANNOTATE EXISTING CUBE WITH NEW COLUMN
        m = self.data[window.name] = Matrix(dims=canonical.dims)
        for coord in canonical._all_combos():
            row = Data()  # IT IS SAD WE MUST HAVE A Data(), THERE ARE {"script": expression} USING THE DOT NOTATION
            for k in cnames:
                row[k] = self.data[k][coord]
            for c, e in zip(coord, self.edges):
                row[e.name] = e.domain.partitions[c]
            m[coord] = accessor(row, Null, Null)  # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO

        self.select.append(window)
        return self
示例#10
0
 def search(self, query):
     query = wrap(query)
     f = jx.get(query.query.filtered.filter)
     filtered = wrap([{
         "_id": i,
         "_source": d
     } for i, d in self.data.items() if f(d)])
     if query.fields:
         return wrap({
             "hits": {
                 "total":
                 len(filtered),
                 "hits": [{
                     "_id":
                     d._id,
                     "fields":
                     unwrap(
                         jx.select([unwrap(d._source)], query.fields)[0])
                 } for d in filtered]
             }
         })
     else:
         return wrap({"hits": {"total": len(filtered), "hits": filtered}})
示例#11
0
    def _index_values(self, columns):
        # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS
        indexed_values = [None]*len(columns)
        for i, s in enumerate(columns):
            index = self._index.get(s.value, None)
            if index is not None:
                indexed_values[i]=index
                continue

            function_name = convert.value2json(s.value.to_dict(), sort_keys=True)
            index = self._index.get(function_name, None)
            indexed_values[i]=index
            if index is not None:
                continue

            indexed_values[i] = index = self._index[function_name] = {}
            accessor = jx.get(s.value)
            for k, ii in self._unique_index.items():
                v = accessor(self._source[ii])
                j = index.get(v)
                if j is None:
                    j = index[v] = set()
                j |= {ii}
        return indexed_values
示例#12
0
    def __init__(self, name, db=None, uid=GUID, exists=False, kwargs=None):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        global _config
        Container.__init__(self, frum=None)
        if db:
            self.db = db
        else:
            self.db = db = Sqlite()

        if not _config:
            from pyLibrary.queries.containers import config as _config
            if not _config.default:
                _config.default = {"type": "sqlite", "settings": {"db": db}}

        self.name = name
        self.uid = listwrap(uid)
        self._next_uid = 1
        self._make_digits_table()

        self.uid_accessor = jx.get(self.uid)
        self.nested_tables = OrderedDict(
        )  # MAP FROM NESTED PATH TO Table OBJECT, PARENTS PROCEED CHILDREN
        self.nested_tables["."] = self
        self.columns = Index(
            keys=[join_field(["names", self.name])]
        )  # MAP FROM DOCUMENT ABS PROPERTY NAME TO THE SET OF SQL COLUMNS IT REPRESENTS (ONE FOR EACH REALIZED DATATYPE)

        if not exists:
            for u in self.uid:
                if u == GUID:
                    pass
                else:
                    c = Column(names={name: u},
                               type="string",
                               es_column=typed_column(u, "string"),
                               es_index=name)
                    self.add_column_to_schema(self.nested_tables, c)

            command = ("CREATE TABLE " + quote_table(name) + "(" +
                       (",".join([quoted_UID + " INTEGER"] + [
                           _quote_column(c) + " " + sql_types[c.type]
                           for u, cs in self.columns.items() for c in cs
                       ])) + ", PRIMARY KEY (" + (", ".join([quoted_UID] + [
                           _quote_column(c) for u in self.uid
                           for c in self.columns[u]
                       ])) + "))")

            self.db.execute(command)
        else:
            # LOAD THE COLUMNS
            command = "PRAGMA table_info(" + quote_table(name) + ")"
            details = self.db.query(command)

            for r in details:
                cname = untyped_column(r[1])
                ctype = r[2].lower()
                column = Column(names={name: cname},
                                type=ctype,
                                nested_path=['.'],
                                es_column=typed_column(cname, ctype),
                                es_index=name)

                self.add_column_to_schema(self.columns, column)
示例#13
0
    def __init__(self, name, db=None, uid=UID_PREFIX + "id", exists=False):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        Container.__init__(self, frum=None)
        if db:
            self.db = db
        else:
            self.db = db = Sqlite()
        self.name = name
        self.uid = listwrap(uid)

        self.columns = {}
        for u in uid:
            if not self.columns.get(u, None):
                cs = self.columns[u] = set()
            if u.startswith(UID_PREFIX):
                cs.add(
                    Column(name=u,
                           table=name,
                           type="integer",
                           es_column=typed_column(u, "integer"),
                           es_index=name))
            else:
                cs.add(
                    Column(name=u,
                           table=name,
                           type="text",
                           es_column=typed_column(u, "text"),
                           es_index=name))

        self.uid_accessor = jx.get(self.uid)
        self.nested_tables = {}  # MAP FROM TABLE NAME TO Table OBJECT
        if exists:
            # LOAD THE COLUMNS
            command = "PRAGMA table_info(" + quote_table(name) + ")"
            details = self.db.query(command)
            self.columns = {}
            for r in details:
                cname = untyped_column(r[1])
                ctype = r[2].lower()
                column = Column(name=cname,
                                table=name,
                                type=ctype,
                                es_column=typed_column(cname, ctype),
                                es_index=name)

                cs = self.columns.get(name, Null)
                if not cs:
                    cs = self.columns[name] = set()
                cs.add(column)
        else:
            command = "CREATE TABLE " + quote_table(name) + "(" + \
                      (",".join(_quote_column(c) + " " + c.type for u, cs in self.columns.items() for c in cs)) + \
                      ", PRIMARY KEY (" + \
                      (", ".join(_quote_column(c) for u in self.uid for c in self.columns[u])) + \
                      "))"

            self.db.execute(command)