class RepoManager(object): """Manage all repositories in track.""" def __init__(self, concurrency, interval): self.axdb_client = AxdbClient() self.axops_client = AxopsClient() self.concurrency = concurrency self.interval = interval def run(self): """Create workspaces and perform initial/incremental fetch.""" while True: logger.info('Start repository scan ...') try: self.connect() repos, has_change = self.synchronize() with ThreadPoolExecutor( max_workers=self.concurrency) as executor: futures = [] for i in range(len(repos)): futures.append( executor.submit(self.update_repo, **repos[i])) for future in as_completed(futures): try: if not has_change and future.result(): has_change = True except Exception as e: logger.warning( 'Unexpected exception occurred during processing: %s', e) # Notify UI backend about a change in repos if has_change: key = '{}:repos_updated'.format(NAMESPACE) redis_client.set(key, value=str(int(time.time()))) except Exception as e: logger.warning('Repository scan failed: %s', str(e)) else: logger.info('Repository scan completed\n') finally: time.sleep(self.interval) @retry(wait_fixed=5000) def connect(self): """Connect to axops.""" connected = self.axops_client.ping() if not connected: msg = 'Unable to connect to axops' logger.warning(msg) raise ConnectionError(msg) def synchronize(self): """Synchronize all repos.""" logger.info('Synchronizing repositories ...') # Get all repos repos = self.get_all_repos() logger.info('%s repositories currently in track', len(repos)) # Get untracked repos currently on disk untracked_repos = self.get_untracked_repos(repos) logger.info('%s untracked repositories found on disk', len(list(untracked_repos.keys()))) for repo in untracked_repos: # Purge all branch heads logger.info('Purging branch heads (repo: %s) ...', repo) self.axdb_client.purge_branch_heads(repo) # Delete workspace logger.info('Deleting workspace (path: %s) ...', untracked_repos[repo]) shutil.rmtree(untracked_repos[repo]) # Invalidate caches logger.info('Invalidating caches (workspace: %s) ...', untracked_repos[repo]) key_pattern = '^{}\:{}.*$'.format(NAMESPACE, untracked_repos[repo]) keys = redis_client.keys(key_pattern) for k in keys: logger.debug('Invalidating cache (key: %s) ...', k) redis_client.delete(k) # Send event to trigger garbage collection from axops if untracked_repos: kafka_client = ProducerClient() ci_event = { 'Op': "gc", 'Payload': { 'details': "Repo or branch get deleted." } } kafka_client.send(AxSettings.TOPIC_GC_EVENT, key=AxSettings.TOPIC_GC_EVENT, value=ci_event, timeout=120) return repos, len(untracked_repos) > 0 def get_all_repos(self): """Retrieve all repos from axops.""" tools = self.axops_client.get_tools(category='scm') repos = {} for i in range(len(tools)): _repos = tools[i].get('repos', []) for j in range(len(_repos)): parsed_url = urlparse(_repos[j]) protocol, vendor = parsed_url.scheme, parsed_url.hostname m = re.match(r'/([a-zA-Z0-9-]+)/([a-zA-Z0-9_.-]+)', parsed_url.path) if not m: logger.warning('Illegal repo URL: %s, skip', parsed_url) continue _, repo_owner, repo_name = parsed_url.path.split('/', maxsplit=2) key = (vendor, repo_owner, repo_name) if key in repos and repos[key]['protocol'] == 'https': continue repos[key] = { 'repo_type': tools[i].get('type'), 'vendor': vendor, 'protocol': protocol, 'repo_owner': repo_owner, 'repo_name': repo_name, 'username': tools[i].get('username'), 'password': tools[i].get('password'), 'use_webhook': tools[i].get('use_webhook', False) } return list(repos.values()) @staticmethod def get_untracked_repos(repos): """Get all untracked repos.""" # Construct list of expected workspaces expected_workspaces = set() for i in range(len(repos)): expected_workspace = '{}/{}/{}/{}'.format(BASE_DIR, repos[i]['vendor'], repos[i]['repo_owner'], repos[i]['repo_name']) expected_workspaces.add(expected_workspace) # Construct list of all workspaces currently on disk dirs = [ dir[0] for dir in os.walk(BASE_DIR) if dir[0].endswith('/.git') ] workspaces = list(map(lambda v: v[:-5], dirs)) # Construct list of untracked repos untracked_repos = {} for i in range(len(workspaces)): if workspaces[i] not in expected_workspaces: client = GitClient(path=workspaces[i]) repo = client.get_remote() untracked_repos[repo] = workspaces[i] return untracked_repos @staticmethod def get_repo_workspace(repo_vendor, repo_owner, repo_name): return '{}/{}/{}/{}'.format(BASE_DIR, repo_vendor, repo_owner, repo_name) @staticmethod def get_repo_url(protocol, repo_vendor, repo_owner, repo_name): return '{}://{}/{}/{}'.format(protocol, repo_vendor, repo_owner, repo_name) @staticmethod def update_yaml(repo_client, kafka_client, repo, branch, head): """Using Kafka to send a event to axops to update the yamls in the axdb.""" logger.info("Update yaml %s, %s, %s", repo, branch, head) try: yaml_contents = repo_client.get_files(commit=head, subdir=TEMPLATE_DIR, filter_yaml=True) except Exception as e: logger.error("Failed to obtain YAML files: %s", str(e)) return -1 if len(yaml_contents) >= 0: # This is a partition key defined as RepoName$$$$BranchName. # The key is used by Kafka partition, which means it allows concurrency # if the events are for different repo/branch key = '{}$$$${}'.format(repo, branch) payload = { 'Op': 'update', 'Payload': { 'Revision': head, 'Content': [v['content'] for v in yaml_contents] if yaml_contents else [] } } kafka_client.send('devops_template', key=key, value=payload, timeout=120) logger.info("Updated YAML %s files (repo: %s, branch: %s)", len(yaml_contents), repo, branch) return len(yaml_contents) def update_repo(self, repo_type, vendor, protocol, repo_owner, repo_name, username, password, use_webhook): """Update a repo.""" # Examples for the input variables # BASE_DIR: /ax/data/repos # Repo_type: github # Vendor: github.com # Protocol: https # Repo_owner: argo # Repo_name: prod.git is_first_fetch = False do_send_gc_event = False workspace = '{}/{}/{}/{}'.format(BASE_DIR, vendor, repo_owner, repo_name) url = '{}://{}/{}/{}'.format(protocol, vendor, repo_owner, repo_name) kafka_client = ProducerClient() if not os.path.isdir(workspace): os.makedirs(workspace) # If we recreate the workspace, we need to purge all branch heads of this repo self.axdb_client.purge_branch_heads(url) logger.info("Start scanning repository (%s) ...", url) if repo_type == ScmVendors.CODECOMMIT: client = CodeCommitClient(path=workspace, repo=url, username=username, password=password) else: client = GitClient(path=workspace, repo=url, username=username, password=password, use_permanent_credentials=True) # Even if there is no change, performing a fetch is harmless but has a benefit # that, in case the workspace is destroyed without purging the history, we can # still update the workspace to the proper state logger.info("Start fetching ...") client.fetch() # Retrieve all previous branch heads and construct hash table prev_heads = self.axdb_client.get_branch_heads(url) logger.info("Have %s branch heads (repo: %s) from previous scan", len(prev_heads), url) if len(prev_heads) == 0: is_first_fetch = True logger.debug( "This is an initial scan as no previous heads were found") prev_heads_map = dict() for prev_head in prev_heads: key = (prev_head['repo'], prev_head['branch']) prev_heads_map[key] = prev_head['head'] # Retrieve all current branch heads current_heads = client.get_remote_heads() logger.info("Have %s branch heads (repo: %s) from current scan", len(current_heads), url) current_heads = sorted(current_heads, key=lambda v: v['commit_date'], reverse=is_first_fetch) # Find out which branch heads need to be updated heads_to_update = list() heads_for_event = list() for current_head in current_heads: head, branch = current_head['commit'], current_head[ 'reference'].replace('refs/heads/', '') previous_head = prev_heads_map.pop((url, branch), None) if head != previous_head: event = {'repo': url, 'branch': branch, 'head': head} heads_to_update.append(event) if previous_head is None: logger.info( "New branch detected (branch: %s, current head: %s)", branch, head) else: logger.info( "Existing ranch head updated (branch: %s, previous: %s, current: %s)", branch, previous_head, head) # Send CI event in case of policy heads_for_event.append(event.copy()) if prev_heads_map: logger.info("There are %s get deleted from repo: %s", prev_heads_map.keys(), url) do_send_gc_event = True for key in prev_heads_map: self.axdb_client.purge_branch_head(repo=key[0], branch=key[1]) # Invalidate cache if there is head update or branch deleted if heads_to_update or prev_heads_map: cache_key = '{}:{}'.format(NAMESPACE, workspace) logger.info('Invalidating cache (key: %s) ...', cache_key) if redis_client.exists(cache_key): redis_client.delete(cache_key) # Update YAML contents count = 0 for event in heads_to_update: res_count = RepoManager.update_yaml(repo_client=client, kafka_client=kafka_client, repo=url, branch=event['branch'], head=event['head']) if res_count >= 0: self.axdb_client.set_branch_head(**event) count += res_count logger.info( "Updated %s YAML files (template/policy) for %s branches (repo: %s)", count, len(heads_to_update), url) logger.info("Updated %s branch heads (repo: %s)", len(heads_to_update), url) # If garbarge collection needed due to branch or repo deletion if do_send_gc_event: logger.info( "Send gc event so that axops can garbage collect deleted branch / repo" ) ci_event = { 'Op': "gc", 'Payload': { 'details': "Repo or branch get deleted." } } kafka_client.send(AxSettings.TOPIC_GC_EVENT, key=AxSettings.TOPIC_GC_EVENT, value=ci_event, timeout=120) # If webhook is disabled, we need to send CI events if not use_webhook: for event in heads_for_event: commit = client.get_commit(event['head']) ci_event = { 'Op': "ci", 'Payload': { 'author': commit['author'], 'branch': event['branch'], 'commit': commit['revision'], 'committer': commit['committer'], 'date': datetime.datetime.fromtimestamp( commit['date']).strftime('%Y-%m-%dT%H:%M:%S'), 'description': commit['description'], 'repo': commit['repo'], 'type': "push", 'vendor': repo_type } } kafka_client.send("devops_template", key="{}$$$${}".format( event['repo'], event['branch']), value=ci_event, timeout=120) logger.info('Webhook not enabled, send %s devops_ci_event events', len(heads_for_event)) kafka_client.close() logger.info('Successfully scanned repository (%s)', url) return len(heads_to_update) > 0 or len(prev_heads_map) > 0
class JobScheduler(object): def __init__(self, axops_host=None): self._schedule_lock = threading.Lock() self.axops_client = AxopsClient(host=axops_host) self.scheduler = BackgroundScheduler() self.event_notification_client = EventNotificationClient( FACILITY_AX_SCHEDULER) def init(self): """ Init Job Scheduler. Check access to AxOps. """ counter = 0 while counter < 20: if self.axops_client.ping(): self.refresh_scheduler() return else: counter += 1 logger.info("JobScheduler cannot ping AxOps. Count: %s", counter) time.sleep(10) logger.error( "[Init] scheduler failed to ping AxOps after 20 tries. Exit.") sys.exit(1) def refresh_scheduler(self): """ Refresh the job scheduler. The major functionality of this service. Read all the cron policies from AxOps, then load the schedules into the job scheduler. """ if self._schedule_lock.acquire( timeout=2): # Try to acquire lock for 2 seconds try: scheduler = BackgroundScheduler() logger.info("Start refreshing the scheduler.") for policy in self.axops_client.get_policy(enabled=True): self.add_policy(policy, scheduler) # Scheduler swap self.stop_scheduler() self.scheduler = scheduler self.scheduler.start() logger.info( "Successfully finish refreshing the scheduler. \n%s", AxPrettyPrinter().pformat(self.get_schedules())) return {} finally: self._schedule_lock.release() else: with self._schedule_lock: logger.info( "Some other thread is refreshing the scheduler. Instant return." ) return {'Details': 'Instant return'} def add_policy(self, policy, scheduler): """ Add a schedule into scheduler based on policy. Ignore exceptions (for now). """ try: policy_json = policy_schema(policy) policy_id = policy_json['id'] event_list = policy_json['when'] logger.info("Processing policy, %s", policy_id) for event in event_list: if event.get('event', None) != 'on_cron': continue event_json = schedule_schema(event) cron_str = event_json['schedule'].strip().split( ' ') # Parse the cron string assert len(cron_str) == 5, "Invalid cron schedule format" logger.info("Adding cron event, \n %s", AxPrettyPrinter().pformat(event_json)) scheduler.add_job( self.create_service, 'cron', # Add cron job into scheduler id='{}-{}'.format(policy_id, cron_str), args=[policy_json], minute=cron_str[0], hour=cron_str[1], day=cron_str[2], month=cron_str[3], day_of_week=cron_str[4], timezone=event_json['timezone']) except MultipleInvalid as e: logger.exception("Invalid cron policy format, \n%s. Details: %s", AxPrettyPrinter().pformat(policy), str(e)) try: if 'when' in policy: policy['when'] = json.dumps(policy['when']) self.event_notification_client.send_message_to_notification_center( CODE_JOB_SCHEDULER_INVALID_POLICY_DEFINITION, detail=policy) except Exception: logger.exception( "Failed to send out alert to notification center.") except AssertionError as e: logger.exception( "Invalid cron policy format, \n%s, cron string. Details: %s", AxPrettyPrinter().pformat(policy), str(e)) try: if 'when' in policy: policy['when'] = json.dumps(policy['when']) self.event_notification_client.send_message_to_notification_center( CODE_JOB_SCHEDULER_INVALID_CRON_EXPRESSION, detail=policy) except Exception: logger.exception( "Failed to send out alert to notification center.") except Exception as e: logger.exception( "Failed to add event, \n%s into scheduler. Details: %s", AxPrettyPrinter().pformat(policy), str(e)) try: if 'when' in policy: policy['when'] = json.dumps(policy['when']) self.event_notification_client.send_message_to_notification_center( CODE_JOB_SCHEDULER_CANNOT_ADD_POLICY, detail=policy) except Exception: logger.exception( "Failed to send out alert to notification center.") @staticmethod def is_matched(target_branches, branch_name): """ Check the regex of target branches can be matched with branch name. """ is_matched = False for branch in target_branches: try: if re.compile(branch).match(branch_name): is_matched = True break except Exception as e: logger.exception("Failed to compare using regex. %s", str(e)) pass return is_matched def create_service(self, policy): """ Create job based on the policy. The payload is tailored for the AxOps POST /v1/services. This might get improved in the future. """ logger.info( "Start triggering job based on cron schedule. Policy info: \n%s", AxPrettyPrinter().pformat(policy)) service_template = self.axops_client.get_templates( policy['repo'], policy['branch'], name=policy['template'])[0] commit_res = self.axops_client.get_commit_info(repo=policy['repo'], branch=policy['branch'], limit=1) if not commit_res or len(commit_res) != 1: logger.error( "Error retrieving latest commit info for cron job, commit_info: %s. Return", commit_res) return commit_json = commit_schema(commit_res[0]) notification_info = policy['notifications'] commit_info = { 'revision': commit_json['revision'], 'repo': commit_json['repo'], 'branch': commit_json['branch'], 'author': commit_json['author'], 'committer': commit_json['committer'], 'description': commit_json['description'], 'date': commit_json['date'] } parameters = copy.deepcopy(policy['arguments']) parameters['session.commit'] = commit_json['revision'] parameters['session.branch'] = commit_json['branch'] parameters['session.repo'] = commit_json['repo'] service = { 'template_id': service_template['id'], 'arguments': parameters, 'policy_id': policy['id'], 'commit': commit_info, } if notification_info: service['notifications'] = notification_info logger.info("Creating new service with the following payload ...\n%s", AxPrettyPrinter().pformat(service)) service = self.axops_client.create_service(service) logger.info('Successfully created service (id: %s)', service['id']) def get_schedules(self): """ Get the scheduled jobs in the current scheduler. :return: list of scheduled jobs. """ result = dict() if self.scheduler: for job in self.scheduler.get_jobs(): result[job.id] = str(job) return result def stop_scheduler(self, wait=False): """ Stop the current scheduler. :param wait: whether to wait for the current running job. :return: """ if self.scheduler.running: self.scheduler.shutdown(wait=wait)