def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = convert.json2value(row.json), None timestamp = Date(self.rollover_field(wrap(row).value)) if timestamp == None or timestamp < Date.today() - self.rollover_max: return Null rounded_timestamp = timestamp.floor(self.rollover_interval) queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = jx.run({ "from": self.cluster.get_aliases(), "where": { "regex": { "index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d" } }, "sort": "index" }) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) else: try: es = self.cluster.create_index( create_timestamp=rounded_timestamp, settings=self.settings) es.add_alias(self.settings.index) except Exception, e: if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) with suppress_exception: es.set_refresh_interval(seconds=60 * 10, timeout=5) self._delete_old_indexes(candidates) queue = self.known_queues[ rounded_timestamp.unix] = es.threaded_queue( max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
def _delete_old_indexes(self, candidates): for c in candidates: timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S") if timestamp + self.rollover_interval < Date.today() - self.rollover_max: # Log.warning("Will delete {{index}}", index=c.index) try: self.cluster.delete_index(c.index) except Exception, e: Log.warning("could not delete index {{index}}", index=c.index, cause=e)
def _delete_old_indexes(self, candidates): for c in candidates: timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S") if timestamp + self.rollover_interval < Date.today( ) - self.rollover_max: # Log.warning("Will delete {{index}}", index=c.index) try: self.cluster.delete_index(c.index) except Exception, e: Log.warning("could not delete index {{index}}", index=c.index, cause=e)
def _get_queue(self, row): row = wrap(row) if row.json: row.value, row.json = convert.json2value(row.json), None timestamp = Date(self.rollover_field(wrap(row).value)) if timestamp == None or timestamp < Date.today() - self.rollover_max: return Null rounded_timestamp = timestamp.floor(self.rollover_interval) queue = self.known_queues.get(rounded_timestamp.unix) if queue == None: candidates = jx.run({ "from": self.cluster.get_aliases(), "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}}, "sort": "index" }) best = None for c in candidates: c = wrap(c) c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT) if timestamp > c.date: best = c if not best or rounded_timestamp > best.date: if rounded_timestamp < wrap(candidates[-1]).date: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) else: try: es = self.cluster.create_index(create_timestamp=rounded_timestamp, settings=self.settings) es.add_alias(self.settings.index) except Exception, e: if "IndexAlreadyExistsException" not in e: Log.error("Problem creating index", cause=e) return self._get_queue(row) # TRY AGAIN else: es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, settings=self.settings) with suppress_exception: es.set_refresh_interval(seconds=60 * 10, timeout=5) self._delete_old_indexes(candidates) queue = self.known_queues[rounded_timestamp.unix] = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
def loop(source, coverage_summary_index, settings, please_stop): try: cluster = elasticsearch.Cluster(source) aliases = cluster.get_aliases() candidates = [] for pairs in aliases: if pairs.alias == source.index: candidates.append(pairs.index) candidates = jx.sort(candidates, {".": "desc"}) for index_name in candidates: coverage_index = elasticsearch.Index(index=index_name, read_only=False, settings=source) push_date_filter = unicode2Date(coverage_index.settings.index[-15::], elasticsearch.INDEX_DATE_FORMAT) while not please_stop: # IDENTIFY NEW WORK Log.note("Working on index {{index}}", index=index_name) coverage_index.refresh() todo = http.post_json(settings.url, json={ "from": "coverage", "groupby": ["source.file.name", "build.revision12"], "where": {"and": [ {"missing": "source.method.name"}, {"missing": "source.file.min_line_siblings"}, {"gte": {"repo.push.date": push_date_filter}} ]}, "format": "list", "limit": coalesce(settings.batch_size, 100) }) if not todo.data: break queue = Queue("pending source files to review") queue.extend(todo.data[0:coalesce(settings.batch_size, 100):]) threads = [ Thread.run( "processor" + unicode(i), process_batch, queue, coverage_index, coverage_summary_index, settings, please_stop=please_stop ) for i in range(NUM_THREAD) ] # ADD STOP MESSAGE queue.add(Thread.STOP) # WAIT FOR THEM TO COMPLETE for t in threads: t.join() please_stop.go() return except Exception, e: Log.warning("Problem processing", cause=e)