def _validate_query(self, cfg, query): validator = cfg.get("query_validator") if validator is None: return True filters = app.config.get("QUERY_FILTERS", {}) validator_path = filters.get(validator) fn = plugin.load_function(validator_path) if fn is None: msg = "Unable to load query validator for {x}".format(x=validator) raise exceptions.ConfigurationException(msg) return fn(query)
def _post_filter_search_results(self, cfg, res, unpacked=False): filters = app.config.get("QUERY_FILTERS", {}) result_filter_names = cfg.get("result_filters", []) for result_filter_name in result_filter_names: fn = plugin.load_function(filters.get(result_filter_name)) if fn is None: msg = "Unable to load result filter for {x}".format(x=result_filter_name) raise exceptions.ConfigurationException(msg) # apply the result filter res = fn(res, unpacked=unpacked) return res
def _post_filter_search_results(self, cfg, res, unpacked=False): filters = app.config.get("QUERY_FILTERS", {}) result_filter_names = cfg.get("result_filters", []) for result_filter_name in result_filter_names: fn = plugin.load_function(filters.get(result_filter_name)) if fn is None: msg = "Unable to load result filter for {x}".format( x=result_filter_name) raise exceptions.ConfigurationException(msg) # apply the result filter res = fn(res, unpacked=unpacked) return res
def _pre_filter_search_query(self, cfg, query): # now run the query through the filters filters = app.config.get("QUERY_FILTERS", {}) filter_names = cfg.get("query_filters", []) for filter_name in filter_names: # because of back-compat, we have to do a few tricky things here... # filter may be the name of a filter in the list of query filters fn = plugin.load_function(filters.get(filter_name)) if fn is None: msg = "Unable to load query filter for {x}".format(x=filter_name) raise exceptions.ConfigurationException(msg) # run the filter fn(query) return query
def _pre_filter_search_query(self, cfg, query): # now run the query through the filters filters = app.config.get("QUERY_FILTERS", {}) filter_names = cfg.get("query_filters", []) for filter_name in filter_names: # because of back-compat, we have to do a few tricky things here... # filter may be the name of a filter in the list of query filters fn = plugin.load_function(filters.get(filter_name)) if fn is None: msg = "Unable to load query filter for {x}".format( x=filter_name) raise exceptions.ConfigurationException(msg) # run the filter fn(query) return query
def do_upgrade(definition, verbose): # get the source and target es definitions source = definition.get("source") target = definition.get("target") if source is None: source = { "host": app.config.get("ELASTIC_SEARCH_HOST"), "index": app.config.get("ELASTIC_SEARCH_DB") } if target is None: target = { "host": app.config.get("ELASTIC_SEARCH_HOST"), "index": app.config.get("ELASTIC_SEARCH_DB"), "mappings": False } sconn = esprit.raw.Connection(source.get("host"), source.get("index")) tconn = esprit.raw.Connection(target.get("host"), target.get("index")) if verbose: print("Source", source) print("Target", target) # get the defined batch size batch_size = definition.get("batch", 500) for tdef in definition.get("types", []): print("Upgrading", tdef.get("type")) batch = [] total = 0 first_page = esprit.raw.search(sconn, tdef.get("type")) max = first_page.json().get("hits", {}).get("total", 0) type_start = datetime.now() default_query={ "query": {"match_all": {}} } try: for result in esprit.tasks.scroll(sconn, tdef.get("type"), q=tdef.get("query",default_query), keepalive=tdef.get("keepalive", "1m"), page_size=tdef.get("scroll_size", 1000), scan=True): # learn what kind of model we've got model_class = MODELS.get(tdef.get("type")) original = deepcopy(result) if tdef.get("init_with_model", True): # instantiate an object with the data try: result = model_class(**result) except DataStructureException as e: print("Could not create model for {0}, Error: {1}".format(result['id'], str(e))) continue for function_path in tdef.get("functions", []): fn = plugin.load_function(function_path) result = fn(result) data = result _id = result.get("id", "id not specified") if isinstance(result, model_class): # run the tasks specified with this object type tasks = tdef.get("tasks", None) if tasks: for func_call, kwargs in tasks.items(): getattr(result, func_call)(**kwargs) # run the prep routine for the record try: result.prep() except AttributeError: if verbose: print(tdef.get("type"), result.id, "has no prep method - no, pre-save preparation being done") pass data = result.data _id = result.id # add the data to the batch data = _diff(original, data) if "id" not in data: data["id"] = _id data = {"doc" : data} batch.append(data) if verbose: print("added", tdef.get("type"), _id, "to batch update") # When we have enough, do some writing if len(batch) >= batch_size: total += len(batch) print(datetime.now(), "writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max) esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update") batch = [] # do some timing predictions batch_tick = datetime.now() time_so_far = batch_tick - type_start seconds_so_far = time_so_far.total_seconds() estimated_seconds_remaining = ((seconds_so_far * max) / total) - seconds_so_far estimated_finish = batch_tick + timedelta(seconds=estimated_seconds_remaining) print('Estimated finish time for this type {0}.'.format(estimated_finish)) except esprit.tasks.ScrollTimeoutException: # Try to write the part-batch to index if len(batch) > 0: total += len(batch) print(datetime.now(), "scroll timed out / writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max) esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update") batch = [] # Write the last part-batch to index if len(batch) > 0: total += len(batch) print(datetime.now(), "final result set / writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max) esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update")
def do_upgrade(definition, verbose): # get the source and target es definitions source = definition.get("source") target = definition.get("target") if source is None: source = { "host": app.config.get("ELASTIC_SEARCH_HOST"), "index": app.config.get("ELASTIC_SEARCH_DB") } if target is None: target = { "host": app.config.get("ELASTIC_SEARCH_HOST"), "index": app.config.get("ELASTIC_SEARCH_DB"), "mappings": False } sconn = esprit.raw.Connection(source.get("host"), source.get("index")) tconn = esprit.raw.Connection(target.get("host"), target.get("index")) if verbose: print "Source", source print "Target", target # get the defined batch size batch_size = definition.get("batch", 500) for tdef in definition.get("types", []): print "Upgrading", tdef.get("type") batch = [] total = 0 first_page = esprit.raw.search(sconn, tdef.get("type")) max = first_page.json().get("hits", {}).get("total", 0) type_start = datetime.now() try: for result in esprit.tasks.scroll(sconn, tdef.get("type"), keepalive=tdef.get("keepalive", "1m"), page_size=tdef.get("scroll_size", 1000), scan=True): # learn what kind of model we've got model_class = MODELS.get(tdef.get("type")) original = deepcopy(result) if tdef.get("init_with_model", True): # instantiate an object with the data try: result = model_class(**result) except DataStructureException as e: print "Could not create model for {0}, Error: {1}".format(result['id'], e.message) continue for function_path in tdef.get("functions", []): fn = plugin.load_function(function_path) result = fn(result) data = result _id = result.get("id", "id not specified") if isinstance(result, model_class): # run the tasks specified with this object type tasks = tdef.get("tasks", None) if tasks: for func_call, kwargs in tasks.iteritems(): getattr(result, func_call)(**kwargs) # run the prep routine for the record try: result.prep() except AttributeError: if verbose: print tdef.get("type"), result.id, "has no prep method - no, pre-save preparation being done" pass data = result.data _id = result.id # add the data to the batch data = _diff(original, data) if "id" not in data: data["id"] = _id data = {"doc" : data} batch.append(data) if verbose: print "added", tdef.get("type"), _id, "to batch update" # When we have enough, do some writing if len(batch) >= batch_size: total += len(batch) print datetime.now(), "writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update") batch = [] # do some timing predictions batch_tick = datetime.now() time_so_far = batch_tick - type_start seconds_so_far = time_so_far.total_seconds() estimated_seconds_remaining = ((seconds_so_far * max) / total) - seconds_so_far estimated_finish = batch_tick + timedelta(seconds=estimated_seconds_remaining) print 'Estimated finish time for this type {0}.'.format(estimated_finish) except esprit.tasks.ScrollTimeoutException: # Try to write the part-batch to index if len(batch) > 0: total += len(batch) print datetime.now(), "scroll timed out / writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update") batch = [] # Write the last part-batch to index if len(batch) > 0: total += len(batch) print datetime.now(), "final result set / writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update")