def __init__( self, hg=None, # CONNECT TO hg repo=None, # CONNECTION INFO FOR ES CACHE branches=None, # CONNECTION INFO FOR ES CACHE use_cache=False, # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES timeout=30 * SECOND, kwargs=None): if not _hg_branches: _late_imports() self.es_locker = Lock() self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE) self.settings = kwargs self.timeout = Duration(timeout) # VERIFY CONNECTIVITY with Explanation("Test connect with hg"): response = http.head(self.settings.hg.url) if branches == None: self.branches = _hg_branches.get_branches(kwargs=kwargs) self.es = None return self.last_cache_miss = Date.now() set_default(repo, {"schema": revision_schema}) self.es = elasticsearch.Cluster(kwargs=repo).get_or_create_index( kwargs=repo) def setup_es(please_stop): with suppress_exception: self.es.add_alias() with suppress_exception: self.es.set_refresh_interval(seconds=1) Thread.run("setup_es", setup_es) self.branches = _hg_branches.get_branches(kwargs=kwargs) self.timeout = timeout Thread.run("hg daemon", self._daemon)
def __init__( self, hg=None, # CONNECT TO hg repo=None, # CONNECTION INFO FOR ES CACHE branches=None, # CONNECTION INFO FOR ES CACHE use_cache=False, # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES timeout=30 * SECOND, kwargs=None ): if not _hg_branches: _late_imports() self.es_locker = Lock() self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE) self.settings = kwargs self.timeout = Duration(timeout) # VERIFY CONNECTIVITY with Explanation("Test connect with hg"): response = http.head(self.settings.hg.url) if branches == None: self.branches = _hg_branches.get_branches(kwargs=kwargs) self.es = None return self.last_cache_miss = Date.now() set_default(repo, {"schema": revision_schema}) self.es = elasticsearch.Cluster(kwargs=repo).get_or_create_index(kwargs=repo) def setup_es(please_stop): with suppress_exception: self.es.add_alias() with suppress_exception: self.es.set_refresh_interval(seconds=1) Thread.run("setup_es", setup_es) self.branches = _hg_branches.get_branches(kwargs=kwargs) self.timeout = timeout Thread.run("hg daemon", self._daemon)
def get_revision(self, revision, locale=None, get_diff=False): """ EXPECTING INCOMPLETE revision OBJECT RETURNS revision """ rev = revision.changeset.id if not rev: return Null elif rev == "None": return Null elif revision.branch.name == None: return Null locale = coalesce(locale, revision.branch.locale, DEFAULT_LOCALE) output = self._get_from_elasticsearch(revision, locale=locale, get_diff=get_diff) if output: if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED output.changeset.diff = None if DEBUG: Log.note("Got hg ({{branch}}, {{locale}}, {{revision}}) from ES", branch=output.branch.name, locale=locale, revision=output.changeset.id) if output.push.date >= Date.now()-MAX_TODO_AGE: self.todo.add((output.branch, listwrap(output.parents))) self.todo.add((output.branch, listwrap(output.children))) if output.push.date: return output found_revision = copy(revision) if isinstance(found_revision.branch, (text_type, binary_type)): lower_name = found_revision.branch.lower() else: lower_name = found_revision.branch.name.lower() if not lower_name: Log.error("Defective revision? {{rev|json}}", rev=found_revision.branch) b = found_revision.branch = self.branches[(lower_name, locale)] if not b: b = found_revision.branch = self.branches[(lower_name, DEFAULT_LOCALE)] if not b: Log.error("can not find branch ({{branch}}, {{locale}})", branch=lower_name, locale=locale) if Date.now() - Date(b.etl.timestamp) > _OLD_BRANCH: self.branches = _hg_branches.get_branches(kwargs=self.settings) push = self._get_push(found_revision.branch, found_revision.changeset.id) url1 = found_revision.branch.url.rstrip("/") + "/json-info?node=" + found_revision.changeset.id[0:12] url2 = found_revision.branch.url.rstrip("/") + "/json-rev/" + found_revision.changeset.id[0:12] with Explanation("get revision from {{url}}", url=url1, debug=DEBUG): raw_rev2 = Null try: raw_rev1 = self._get_raw_json_info(url1, found_revision.branch) raw_rev2 = self._get_raw_json_rev(url2, found_revision.branch) except Exception as e: if "Hg denies it exists" in e: raw_rev1 = Data(node=revision.changeset.id) else: raise e output = self._normalize_revision(set_default(raw_rev1, raw_rev2), found_revision, push, get_diff) if output.push.date >= Date.now()-MAX_TODO_AGE: self.todo.add((output.branch, listwrap(output.parents))) self.todo.add((output.branch, listwrap(output.children))) if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED output.changeset.diff = None return output
def __init__( self, hg=None, # CONNECT TO hg repo=None, # CONNECTION INFO FOR ES CACHE use_cache=False, # True IF WE WILL USE THE ES FOR DOWNLOADING BRANCHES timeout=30 * SECOND, kwargs=None, ): if not _hg_branches: _late_imports() if not is_text(repo.index): Log.error("Expecting 'index' parameter") self.repo_locker = Lock() self.moves_locker = Lock() self.todo = mo_threads.Queue("todo for hg daemon", max=DAEMON_QUEUE_SIZE) self.settings = kwargs self.timeout = Duration(timeout) self.last_cache_miss = Date.now() # VERIFY CONNECTIVITY with Explanation("Test connect with hg"): http.head(self.settings.hg.url) set_default(repo, { "type": "revision", "schema": revision_schema, }) kwargs.branches = set_default( { "index": repo.index + "-branches", "type": "branch", }, repo, ) moves = set_default( { "index": repo.index + "-moves", }, repo, ) self.branches = _hg_branches.get_branches(kwargs=kwargs) cluster = elasticsearch.Cluster(kwargs=repo) self.repo = cluster.get_or_create_index(kwargs=repo) self.moves = cluster.get_or_create_index(kwargs=moves) def setup_es(please_stop): with suppress_exception: self.repo.add_alias() with suppress_exception: self.moves.add_alias() with suppress_exception: self.repo.set_refresh_interval(seconds=1) with suppress_exception: self.moves.set_refresh_interval(seconds=1) Thread.run("setup_es", setup_es) Thread.run("hg daemon", self._daemon)
def get_revision(self, revision, locale=None, get_diff=False, get_moves=True, after=None): """ EXPECTING INCOMPLETE revision OBJECT RETURNS revision """ rev = revision.changeset.id if not rev: return Null elif rev == "None": return Null elif revision.branch.name == None: return Null locale = coalesce(locale, revision.branch.locale, DEFAULT_LOCALE) output = self._get_from_elasticsearch(revision, locale=locale, get_diff=get_diff, get_moves=get_moves, after=after) if output: if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED output.changeset.diff = None if not get_moves: output.changeset.moves = None DEBUG and Log.note( "Got hg ({{branch}}, {{locale}}, {{revision}}) from ES", branch=output.branch.name, locale=locale, revision=output.changeset.id, ) if output.push.date >= Date.now() - MAX_TODO_AGE: self.todo.add((output.branch, listwrap(output.parents), None)) self.todo.add((output.branch, listwrap(output.children), None)) if output.push.date: return output # RATE LIMIT CALLS TO HG (CACHE MISSES) next_cache_miss = self.last_cache_miss + ( Random.float(WAIT_AFTER_CACHE_MISS * 2) * SECOND) self.last_cache_miss = Date.now() if next_cache_miss > self.last_cache_miss: Log.note( "delaying next hg call for {{seconds|round(decimal=1)}} seconds", seconds=next_cache_miss - self.last_cache_miss, ) Till(till=next_cache_miss.unix).wait() found_revision = copy(revision) if isinstance(found_revision.branch, (text, binary_type)): lower_name = found_revision.branch.lower() else: lower_name = found_revision.branch.name.lower() if not lower_name: Log.error("Defective revision? {{rev|json}}", rev=found_revision.branch) b = found_revision.branch = self.branches[(lower_name, locale)] if not b: b = found_revision.branch = self.branches[(lower_name, DEFAULT_LOCALE)] if not b: Log.warning( "can not find branch ({{branch}}, {{locale}})", branch=lower_name, locale=locale, ) return Null if Date.now() - Date(b.etl.timestamp) > _hg_branches.OLD_BRANCH: self.branches = _hg_branches.get_branches(kwargs=self.settings) push = self._get_push(found_revision.branch, found_revision.changeset.id) id12 = found_revision.changeset.id[0:12] url1 = found_revision.branch.url.rstrip( "/") + "/json-info?node=" + id12 url2 = found_revision.branch.url.rstrip("/") + "/json-rev/" + id12 url3 = (found_revision.branch.url.rstrip("/") + "/json-automationrelevance/" + id12) with Explanation("get revision from {{url}}", url=url1, debug=DEBUG): raw_rev2 = Null automation_details = Null try: raw_rev1 = self._get_raw_json_info(url1, found_revision.branch) raw_rev2 = self._get_raw_json_rev(url2, found_revision.branch) automation_details = self._get_raw_json_rev( url3, found_revision.branch) except Exception as e: if "Hg denies it exists" in e: raw_rev1 = Data(node=revision.changeset.id) else: raise e raw_rev3_changeset = first(r for r in automation_details.changesets if r.node[:12] == id12) if last(automation_details.changesets) != raw_rev3_changeset: Log.note("interesting") output = self._normalize_revision( set_default(raw_rev1, raw_rev2, raw_rev3_changeset), found_revision, push, get_diff, get_moves, ) if output.push.date >= Date.now() - MAX_TODO_AGE: self.todo.add((output.branch, listwrap(output.parents), None)) self.todo.add((output.branch, listwrap(output.children), None)) self.todo.add((output.branch, listwrap(output.backsoutnodes), output.push.date)) if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED output.changeset.diff = None if not get_moves: output.changeset.moves = None return output
def _get_from_hg(self, revision, locale=None, get_diff=False, get_moves=True): # RATE LIMIT CALLS TO HG (CACHE MISSES) next_cache_miss = self.last_cache_miss + ( Random.float(WAIT_AFTER_CACHE_MISS * 2) * SECOND) self.last_cache_miss = Date.now() if next_cache_miss > self.last_cache_miss: Log.note( "delaying next hg call for {{seconds|round(decimal=1)}} seconds", seconds=next_cache_miss - self.last_cache_miss, ) Till(till=next_cache_miss.unix).wait() # CLEAN UP BRANCH NAME found_revision = copy(revision) if isinstance(found_revision.branch, (text, binary_type)): lower_name = found_revision.branch.lower() else: lower_name = found_revision.branch.name.lower() if not lower_name: Log.error("Defective revision? {{rev|json}}", rev=found_revision.branch) b = found_revision.branch = self.branches[(lower_name, locale)] if not b: b = found_revision.branch = self.branches[(lower_name, DEFAULT_LOCALE)] if not b: Log.warning( "can not find branch ({{branch}}, {{locale}})", branch=lower_name, locale=locale, ) return Null # REFRESH BRANCHES, IF TOO OLD if Date.now() - Date(b.etl.timestamp) > _hg_branches.OLD_BRANCH: self.branches = _hg_branches.get_branches(kwargs=self.settings) # FIND THE PUSH push = self._get_push(found_revision.branch, found_revision.changeset.id) id12 = found_revision.changeset.id[0:12] base_url = URL(found_revision.branch.url) with Explanation("get revision from {{url}}", url=base_url, debug=DEBUG): raw_rev2 = Null automation_details = Null try: raw_rev1 = self._get_raw_json_info((base_url / "json-info") + {"node": id12}) raw_rev2 = self._get_raw_json_rev(base_url / "json-rev" / id12) automation_details = self._get_raw_json_rev( base_url / "json-automationrelevance" / id12) except Exception as e: if "Hg denies it exists" in e: raw_rev1 = Data(node=revision.changeset.id) else: raise e raw_rev3_changeset = first(r for r in automation_details.changesets if r.node[:12] == id12) if last(automation_details.changesets) != raw_rev3_changeset: Log.note("interesting") output = self._normalize_revision( set_default(raw_rev1, raw_rev2, raw_rev3_changeset), found_revision, push, get_diff, get_moves, ) if output.push.date >= Date.now() - MAX_TODO_AGE: self.todo.extend([ (output.branch, listwrap(output.parents), None), (output.branch, listwrap(output.children), None), ( output.branch, listwrap(output.backsoutnodes), output.push.date, ), ]) if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED output.changeset.diff = None if not get_moves: output.changeset.moves = None return output
def get_revision(self, revision, locale=None, get_diff=False, get_moves=True): """ EXPECTING INCOMPLETE revision OBJECT RETURNS revision """ rev = revision.changeset.id if not rev: return Null elif rev == "None": return Null elif revision.branch.name == None: return Null locale = coalesce(locale, revision.branch.locale, DEFAULT_LOCALE) output = self._get_from_elasticsearch(revision, locale=locale, get_diff=get_diff) if output: if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED output.changeset.diff = None if not get_moves: output.changeset.moves = None DEBUG and Log.note("Got hg ({{branch}}, {{locale}}, {{revision}}) from ES", branch=output.branch.name, locale=locale, revision=output.changeset.id) if output.push.date >= Date.now()-MAX_TODO_AGE: self.todo.add((output.branch, listwrap(output.parents))) self.todo.add((output.branch, listwrap(output.children))) if output.push.date: return output # RATE LIMIT CALLS TO HG (CACHE MISSES) next_cache_miss = self.last_cache_miss + (Random.float(WAIT_AFTER_CACHE_MISS * 2) * SECOND) self.last_cache_miss = Date.now() if next_cache_miss > self.last_cache_miss: Log.note("delaying next hg call for {{seconds|round(decimal=1)}}", seconds=next_cache_miss - self.last_cache_miss) Till(till=next_cache_miss.unix).wait() found_revision = copy(revision) if isinstance(found_revision.branch, (text_type, binary_type)): lower_name = found_revision.branch.lower() else: lower_name = found_revision.branch.name.lower() if not lower_name: Log.error("Defective revision? {{rev|json}}", rev=found_revision.branch) b = found_revision.branch = self.branches[(lower_name, locale)] if not b: b = found_revision.branch = self.branches[(lower_name, DEFAULT_LOCALE)] if not b: Log.warning("can not find branch ({{branch}}, {{locale}})", branch=lower_name, locale=locale) return Null if Date.now() - Date(b.etl.timestamp) > _OLD_BRANCH: self.branches = _hg_branches.get_branches(kwargs=self.settings) push = self._get_push(found_revision.branch, found_revision.changeset.id) url1 = found_revision.branch.url.rstrip("/") + "/json-info?node=" + found_revision.changeset.id[0:12] url2 = found_revision.branch.url.rstrip("/") + "/json-rev/" + found_revision.changeset.id[0:12] with Explanation("get revision from {{url}}", url=url1, debug=DEBUG): raw_rev2 = Null try: raw_rev1 = self._get_raw_json_info(url1, found_revision.branch) raw_rev2 = self._get_raw_json_rev(url2, found_revision.branch) except Exception as e: if "Hg denies it exists" in e: raw_rev1 = Data(node=revision.changeset.id) else: raise e output = self._normalize_revision(set_default(raw_rev1, raw_rev2), found_revision, push, get_diff, get_moves) if output.push.date >= Date.now()-MAX_TODO_AGE: self.todo.add((output.branch, listwrap(output.parents))) self.todo.add((output.branch, listwrap(output.children))) if not get_diff: # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED output.changeset.diff = None if not get_moves: output.changeset.moves = None return output