def window(self, window): if window.edges or window.sort: raise NotImplementedError() from jx_python import jx # SET OP canonical = self.data.values()[0] accessor = jx.get(window.value) cnames = self.data.keys() # ANNOTATE EXISTING CUBE WITH NEW COLUMN m = self.data[window.name] = Matrix(dims=canonical.dims) for coord in canonical._all_combos(): row = Data( ) # IT IS SAD WE MUST HAVE A Data(), THERE ARE {"script": expression} USING THE DOT NOTATION for k in cnames: row[k] = self.data[k][coord] for c, e in zip(coord, self.edges): row[e.name] = e.domain.partitions[c] m[coord] = accessor( row, Null, Null) # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO self.select.append(window) return self
def __init__( self, rollover_field, # the FIELD with a timestamp to use for determining which index to push to rollover_interval, # duration between roll-over to new index rollover_max, # remove old indexes, do not add old records schema, # es schema queue_size=10000, # number of documents to queue in memory batch_size=5000, # number of documents to push at once typed=None, # indicate if we are expected typed json kwargs=None # plus additional ES settings ): if kwargs.tjson != None: Log.error("not expected") if typed == None: Log.error("not expected") schema.settings.index.max_result_window = 100000 # REQUIRED FOR ACTIVEDATA NESTED QUERIES schema.settings.index.max_inner_result_window = 100000 # REQUIRED FOR ACTIVEDATA NESTED QUERIES self.settings = kwargs self.locker = Lock("lock for rollover_index") self.rollover_field = jx.get(rollover_field) self.rollover_interval = self.settings.rollover_interval = Duration( rollover_interval) self.rollover_max = self.settings.rollover_max = Duration(rollover_max) self.known_queues = {} # MAP DATE TO INDEX self.cluster = elasticsearch.Cluster(self.settings)
def __init__(self, name, db=None, uid=UID, kwargs=None): """ :param name: NAME FOR THIS TABLE :param db: THE DB TO USE :param uid: THE UNIQUE INDEX FOR THIS TABLE :return: HANDLE FOR TABLE IN db """ global _config if db: self.db = db else: self.db = db = Sqlite() if not _config: # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE from jx_base.container import config as _config if not _config.default: _config.default = {"type": "sqlite", "settings": {"db": db}} self.sf = Snowflake(fact=name, uid=uid, db=db) self._next_guid = generateGuid() self._next_uid = 1 self._make_digits_table() self.uid_accessor = jx.get(self.sf.uid)
def __init__(self, rollover_field, rollover_interval, rollover_max, queue_size=10000, batch_size=5000, kwargs=None): """ :param rollover_field: the FIELD with a timestamp to use for determining which index to push to :param rollover_interval: duration between roll-over to new index :param rollover_max: remove old indexes, do not add old records :param queue_size: number of documents to queue in memory :param batch_size: number of documents to push at once :param kwargs: plus additional ES settings :return: """ self.settings = kwargs self.locker = Lock("lock for rollover_index") self.rollover_field = jx.get(rollover_field) self.rollover_interval = self.settings.rollover_interval = Duration( kwargs.rollover_interval) self.rollover_max = self.settings.rollover_max = Duration( kwargs.rollover_max) self.known_queues = {} # MAP DATE TO INDEX self.cluster = elasticsearch.Cluster(self.settings)
def search(self, query): query = to_data(query) f = jx.get(query.query.filtered.filter) filtered = list_to_data([{ "_id": i, "_source": d } for i, d in self.data.items() if f(d)]) if query.fields: return dict_to_data({ "hits": { "total": len(filtered), "hits": [{ "_id": d._id, "fields": unwrap( jx.select([unwrap(d._source)], query.fields)[0]) } for d in filtered] } }) else: return dict_to_data( {"hits": { "total": len(filtered), "hits": filtered }})
def __init__(self, name, db=None, uid=UID, kwargs=None): """ :param name: NAME FOR THIS TABLE :param db: THE DB TO USE :param uid: THE UNIQUE INDEX FOR THIS TABLE :return: HANDLE FOR TABLE IN db """ global _config if isinstance(db, Sqlite): self.db = db else: self.db = db = Sqlite(db) if not _config: # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE from jx_base.container import config as _config if not _config.default: _config.default = {"type": "sqlite", "settings": {"db": db}} ns = Namespace(db=db) self.facts = ns.create_or_replace_facts(fact_name=name) self._next_guid = generateGuid() self._next_uid = 1 self._make_digits_table() self.uid_accessor = jx.get(uid)
def __init__(self, es=None, id_info=None): self.es = es self.id_info = id_info self.get_id = jx.get(id_info.field) self.get_version = jx.get(id_info.version) if es: _schema = Data() for c in parse_properties(es.settings.alias, ".", ROOT_PATH, es.get_properties()): if c.es_type in (OBJECT, NESTED): _schema[c.name] = {} else: _schema[c.name] = c self.schema = unwrap(_schema) else: self.schema = {}
def __init__( self, rollover_field, # the FIELD with a timestamp to use for determining which index to push to rollover_interval, # duration between roll-over to new index rollover_max, # remove old indexes, do not add old records schema, # es schema queue_size=10000, # number of documents to queue in memory batch_size=5000, # number of documents to push at once typed=None, # indicate if we are expected typed json kwargs=None # plus additional ES settings ): if kwargs.tjson != None: Log.error("not expected") if typed == None: Log.error("not expected") schema.settings.index.max_inner_result_window = 100000 # REQUIRED FOR ACTIVEDATA NESTED QUERIES self.settings = kwargs self.locker = Lock("lock for rollover_index") self.rollover_field = jx.get(rollover_field) self.rollover_interval = self.settings.rollover_interval = Duration(rollover_interval) self.rollover_max = self.settings.rollover_max = Duration(rollover_max) self.known_queues = {} # MAP DATE TO INDEX self.cluster = elasticsearch.Cluster(self.settings)
def search(self, query): query = wrap(query) f = jx.get(query.query.filtered.filter) filtered = wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)]) if query.fields: return wrap({"hits": {"total": len(filtered), "hits": [{"_id": d._id, "fields": unwrap(jx.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}}) else: return wrap({"hits": {"total": len(filtered), "hits": filtered}})
def update(self, command): """ EXPECTING command == {"set":term, "clear":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS A JSON EXPRESSION FILTER """ command = to_data(command) command_clear = listwrap(command["clear"]) command_set = command.set.items() command_where = jx.get(command.where) for c in self.data: if command_where(c): for k in command_clear: c[k] = None for k, v in command_set: c[k] = v
def window(self, window): if window.edges or window.sort: raise NotImplementedError() from jx_python import jx # SET OP canonical = self.data.values()[0] accessor = jx.get(window.value) cnames = self.data.keys() # ANNOTATE EXISTING CUBE WITH NEW COLUMN m = self.data[window.name] = Matrix(dims=canonical.dims) for coord in canonical._all_combos(): row = Data() # IT IS SAD WE MUST HAVE A Data(), THERE ARE {"script": expression} USING THE DOT NOTATION for k in cnames: row[k] = self.data[k][coord] for c, e in zip(coord, self.edges): row[e.name] = e.domain.partitions[c] m[coord] = accessor(row, Null, Null) # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO self.select.append(window) return self
def _index_columns(self, columns): # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS indexed_values = [None]*len(columns) for i, s in enumerate(columns): index = self._index.get(s.value, None) if index is not None: indexed_values[i]=index continue function_name = value2json(s.value.__data__(), sort_keys=True) index = self._index.get(function_name, None) indexed_values[i]=index if index is not None: continue indexed_values[i] = index = self._index[function_name] = {} accessor = jx.get(s.value) for k, ii in self._unique_index.items(): v = accessor(self._source[ii]) j = index.get(v) if j is None: j = index[v] = set() j |= {ii} return indexed_values
def _index_columns(self, columns): # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS indexed_values = [None] * len(columns) for i, s in enumerate(columns): index = self._index.get(s.value, None) if index is not None: indexed_values[i] = index continue function_name = value2json(s.value.__data__(), sort_keys=True) index = self._index.get(function_name, None) indexed_values[i] = index if index is not None: continue indexed_values[i] = index = self._index[function_name] = {} accessor = jx.get(s.value) for k, ii in self._unique_index.items(): v = accessor(self._source[ii]) j = index.get(v) if j is None: j = index[v] = set() j |= {ii} return indexed_values