def __init__(self): self.axdb_client = AxdbClient() self.axops_client = AxopsClient() self.axsys_client = AxsysClient() self.redis_client = RedisClient('redis', db=DB_REPORTING) self.event_notification_client = EventNotificationClient(FACILITY_GATEWAY) self.scm_clients = { ScmVendors.BITBUCKET: BitBucketClient(), ScmVendors.GITHUB: GitHubClient(), ScmVendors.GITLAB: GitLabClient() } self.repo_manager = RepoManager(DEFAULT_CONCURRENCY, DEFAULT_INTERVAL) self.event_trigger = EventTrigger()
def approval(id): """Save an approval result in redis.""" token = request.args.get('token', None) result = jwt.decode(token, 'ax', algorithms=['HS256']) redis_client = RedisClient('redis', db=DB_RESULT) result['timestamp'] = int(time.time()) logger.info("Decode token {}, \n to {}".format( token, json.dumps(result, indent=2))) # differentiate key for approval result from the task result uuid = result['leaf_id'] + '-axapproval' try: logger.info("Setting approval result (%s) to Redis ...", uuid) try: state = gateway.axdb_client.get_approval_info( root_id=result['root_id'], leaf_id=result['leaf_id']) if state and state[0]['result'] != 'WAITING': return redirect( "https://{}/error/404/type/ERR_AX_ILLEGAL_OPERATION;msg=The%20link%20is%20no%20longer%20valid." .format(result['dns'])) if gateway.axdb_client.get_approval_results( leaf_id=result['leaf_id'], user=result['user']): return redirect( "https://{}/error/404/type/ERR_AX_ILLEGAL_OPERATION;msg=Response%20has%20already%20been%20submitted." .format(result['dns'])) # push result to redis (brpop) redis_client.rpush(uuid, value=result, encoder=json.dumps) except Exception as exc: logger.exception(exc) pass # save result to axdb gateway.axdb_client.create_approval_results( leaf_id=result['leaf_id'], root_id=result['root_id'], result=result['result'], user=result['user'], timestamp=result['timestamp']) except Exception as e: msg = 'Failed to save approval result to Redis: {}'.format(e) logger.error(msg) raise else: logger.info('Successfully saved result to Redis') return redirect( "https://{}/success/201;msg=Response%20has%20been%20submitted%20successfully." .format(result['dns']))
def __init__(self, fixturemgr, redis_host=None, redis_db=None): self.fixmgr = fixturemgr self.requestdb = FixtureRequestDatabase(self.fixmgr) port = None if redis_host and ':' in redis_host: redis_host, port = redis_host.split(':', 2) self.redis_client_notification = RedisClient(host=redis_host, port=port, db=DB_RESULT) self.process_interval = common.DEFAULT_PROCESS_INTERVAL self._stop = False # Condition variable to notify the request processor that it should # wake up and process the requests self._process_cv = threading.Condition() self._events = 0 self._request_processor_thread = None self._processor_lock = threading.Lock()
import datetime import json import logging import requests from dateutil.tz import tzlocal from ax.devops.exceptions import UnknownRepository from ax.devops.redis.redis_client import RedisClient, DB_REPORTING from . import BaseScmRestClient from . import TEMPLATE_DIR logger = logging.getLogger(__name__) redis_client = RedisClient('redis', db=DB_REPORTING) class GitLabClient(BaseScmRestClient): """REST API wrapper for GitLab. In GitLab, the structure of endpoint is fundamentally different from GitHub and BitBucket. GitLab has a project in all its rest endpoints. Though currently, it assumes there should be only 1 repository under each project; the situation may change in future. Currently, we assume project = repository. """ STATUS_MAPPING = { -1: 'failed', 0: 'success', 1: 'running' }
class Gateway(object): """Repo Controller""" BASE_DIR = '/ax/data/repos' BRANCH_CACHE_TTL = 5 * 60 # 5 minutes TTL as we expect we won't finish upgrade within 5 minutes NAMESPACE = 'gateway' CLUSTER_NAME_ID = os.environ.get('AX_CLUSTER') CUSTOMER_ID = os.environ.get('AX_CUSTOMER_ID') S3_BUCKET_NAME = 'applatix-cluster-{account}-{seq}'.format(account=CUSTOMER_ID, seq=0) s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME) def __init__(self): self.axdb_client = AxdbClient() self.axops_client = AxopsClient() self.axsys_client = AxsysClient() self.redis_client = RedisClient('redis', db=DB_REPORTING) self.event_notification_client = EventNotificationClient(FACILITY_GATEWAY) self.scm_clients = { ScmVendors.BITBUCKET: BitBucketClient(), ScmVendors.GITHUB: GitHubClient(), ScmVendors.GITLAB: GitLabClient() } self.repo_manager = RepoManager(DEFAULT_CONCURRENCY, DEFAULT_INTERVAL) self.event_trigger = EventTrigger() def get_repos(self, scm_type, url, username, password): """Get all repos owned by the user.""" if scm_type in {ScmVendors.BITBUCKET, ScmVendors.GITHUB, ScmVendors.GITLAB}: try: repos = self.scm_clients[scm_type].get_repos(username, password) except Exception as e: logger.warning('Unable to connect to %s: %s', scm_type, e) detail = { 'type': scm_type, 'username': username, 'error': str(e.detail) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to %s server' % scm_type) else: return repos elif scm_type == ScmVendors.GIT: _, vendor, repo_owner, repo_name = Gateway.parse_repo(url) path = '/tmp/{}/{}/{}'.format(vendor, repo_owner, repo_name) if os.path.isfile(path): os.remove(path) if os.path.isdir(path): shutil.rmtree(path) os.makedirs(path) client = GitClient(path=path, repo=url, username=username, password=password) try: client.list_remote() except Exception as e: logger.warning('Unable to connect to git server (%s): %s', url, e) detail = { 'type': scm_type, 'url': url, 'username': username, 'error': str(e) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to git server') else: return {url: url} elif scm_type == ScmVendors.CODECOMMIT: repos = {} region = 'us-east-1' default_url_format = 'https://git-codecommit.{}.amazonaws.com/v1/repos/{}' client = boto3.client('codecommit', aws_access_key_id=username, aws_secret_access_key=password, region_name=region) try: response = client.list_repositories().get('repositories', []) for r in response: repo_url = default_url_format.format(region, r['repositoryName']) repos[repo_url] = repo_url except Exception as exc: detail = { 'type': scm_type, 'region': region, 'url': default_url_format.format(region, ''), 'username': username, 'error': 'Cannot connect to CodeCommit' + str(exc) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to CodeCommit: %s' % exc) else: return repos else: return {} @staticmethod def parse_repo(repo): """Parse repo url into 4-tuple (protocol, vendor, repo_owner, repo_name). :param repo: :return: """ parsed_url = urlparse(repo) protocol, vendor = parsed_url.scheme, parsed_url.hostname m = re.match(r'/([a-zA-Z0-9\-]+)/([a-zA-Z0-9_.\-/]+)', parsed_url.path) if not m: raise AXScmException('Illegal repo URL', detail='Illegal repo URL ({})'.format(repo)) repo_owner, repo_name = m.groups() return protocol, vendor, repo_owner, repo_name def has_webhook(self, repo): """Test if there is any repo which uses webhook. :param repo: :return: """ tools = self.axops_client.get_tools(category='scm') for i in range(len(tools)): use_webhook = tools[i].get('use_webhook', False) repos = set(tools[i].get('repos', [])) repos -= {repo} if use_webhook and repos: return True return False def get_webhook(self, vendor, repo): """Get webhook :param vendor: :param repo: :returns: """ logger.info('Retrieving webhook (repo: %s) ...', repo) return self.scm_clients[vendor].get_webhook(repo) def create_webhook(self, vendor, repo): """Create webhook :param vendor: :param repo: :returns: """ @retry(wait_fixed=5000, stop_max_delay=20 * 60 * 1000) def _verify_elb(hostname): try: logger.info('Verifying ELB (%s) ...', hostname) ip = socket.gethostbyname(hostname) logger.info('Successfully resolved ELB (%s) to IP (%s)', hostname, ip) except Exception as e: logger.error('ELB not ready: %s', str(e)) raise AXApiInternalError('ELB not ready', str(e)) ip_range = self.scm_clients[vendor].get_webhook_whitelist() # Create ELB payload = {'ip_range': ip_range, 'external_port': 8443, 'internal_port': 8087} try: logger.info('Creating ELB for webhook ...') result = self.axsys_client.create_webhook(**payload) except Exception as e: logger.error('Failed to create ELB for webhook: %s', str(e)) self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_CREATION_FAILURE, detail=payload) raise AXApiInternalError('Failed to create ELB for webhook', str(e)) else: logger.info('Successfully created ELB for webhook') # Verify ELB hostname = result['hostname'] try: _verify_elb(hostname) except Exception as e: logger.error('Timed out on waiting for ELB to be available: %s', str(e)) self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_VERIFICATION_TIMEOUT, detail={'hostname': hostname}) raise AXApiInternalError('Timed out on waiting for ELB to be available: %s' % str(e)) # Create webhook try: logger.info('Creating webhook (repo: %s) ...', repo) self.scm_clients[vendor].create_webhook(repo) except AXApiAuthFailed as e: logger.error('Invalid credential supplied') detail = { 'repo': repo, 'error': 'Invalid credential supplied:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE, detail=detail) raise AXApiInvalidParam('User authentication failed', detail=str(e)) except AXApiForbiddenReq as e: logger.error('Supplied credential is valid but having insufficient permission') detail = { 'repo': repo, 'error': 'Supplied credential is valid but having insufficient permission:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE, detail=detail) raise AXApiInvalidParam('User has insufficient permission', detail=str(e)) except Exception as e: logger.error('Failed to configure webhook: %s', e) detail = { 'repo': repo, 'error': 'Failed to configure webhook:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE, detail=detail) raise AXApiInternalError('Failed to configure webhook', str(e)) else: logger.info('Successfully created webhook (repo: %s)', repo) return {} def delete_webhook(self, vendor, repo): """Delete webhook :param vendor: :param repo: :returns: """ # Delete webhook try: logger.info('Deleting webhook (repo: %s) ...', repo) self.scm_clients[vendor].delete_webhook(repo) except AXApiAuthFailed as e: logger.error('Invalid credential supplied') detail = { 'repo': repo, 'error': 'Invalid credential supplied:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE, detail=detail) raise AXApiInvalidParam('User authentication failed', detail=str(e)) except AXApiForbiddenReq as e: logger.error('Supplied credential is valid but having insufficient permission') detail = { 'repo': repo, 'error': 'Supplied credential is valid but having insufficient permission:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE, detail=detail) raise AXApiInvalidParam('User has insufficient permission', detail=str(e)) except Exception as e: logger.error('Failed to delete webhook: %s', e) detail = { 'repo': repo, 'error': 'Failed to delete webhook:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE, detail=detail) raise AXApiInternalError('Failed to delete webhook', str(e)) else: logger.info('Successfully deleted webhook (repo: %s)', repo) # Delete ELB try: if not self.has_webhook(repo): logger.info('Deleting ELB for webhook ...') self.axsys_client.delete_webhook() except Exception as e: logger.error('Failed to delete ELB for webhook: %s', str(e)) detail = {'repo': repo, 'error': 'Failed to delete ELB for webhook' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_DELETION_FAILURE, detail=detail) raise AXApiInternalError('Failed to delete ELB for webhook', str(e)) else: logger.info('Successfully deleted ELB for webhook') return {} def purge_branches(self, repo, branch=None): """Purge branch heads. :param repo: :param branch: :return: """ if not repo: raise AXApiInvalidParam('Missing required parameter', 'Missing required parameter (repo)') logger.info('Purging branch heads (repo: %s, branch: %s) ...', repo, branch) try: if not branch: self.axdb_client.purge_branch_heads(repo) else: self.axdb_client.purge_branch_head(repo, branch) except Exception as e: message = 'Unable to purge branch heads' detail = 'Unable to purge branch heads (repo: {}, branch: {}): {}'.format(repo, branch, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: logger.info('Successfully purged branch heads') def get_branches(self, repo=None, branch=None, order_by=None, limit=None): """Get branches. :param repo: :param branch: :param order_by: :param limit: :return: """ def _get_branches(workspace): """Retrieve list of remote branches in the workspace. :param workspace: :return: a list of dictionaries. """ try: key = '{}:{}'.format(Gateway.NAMESPACE, workspace) if self.redis_client.exists(key): logger.info('Loading cache (workspace: %s) ...', workspace) results = self.redis_client.get(key, decoder=json.loads) return results else: logger.info('Scanning workspace (%s) ...', workspace) git_client = GitClient(path=workspace, read_only=True) repo = git_client.get_remote() branches = git_client.get_remote_heads() results = [] for i in range(len(branches)): results.append({ 'repo': repo, 'name': branches[i]['reference'], 'revision': branches[i]['commit'], 'commit_date': branches[i]['commit_date'] }) logger.info('Saving cache (workspace: %s) ...', workspace) self.redis_client.set(key, results, expire=Gateway.BRANCH_CACHE_TTL, encoder=json.dumps) return results except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e) return [] logger.info('Retrieving branches (repo: %s, branch: %s) ...', repo, branch) if repo: repo = unquote(repo) _, vendor, repo_owner, repo_name = self.parse_repo(repo) workspaces = ['{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name)] else: dirs = [dir_name[0] for dir_name in os.walk(Gateway.BASE_DIR) if dir_name[0].endswith('/.git')] workspaces = list(map(lambda v: v[:-5], dirs)) branches = [] with ThreadPoolExecutor(max_workers=20) as executor: futures = [] for i in range(len(workspaces)): futures.append(executor.submit(_get_branches, workspaces[i])) for future in as_completed(futures): try: data = future.result() except Exception as e: logger.warning('Unexpected exception occurred during processing: %s', e) else: for i in range(len(data)): branches.append(data[i]) if branch: pattern = '.*{}.*'.format(branch.replace('*', '.*')) branches = [branches[i] for i in range(len(branches)) if re.match(pattern, branches[i]['name'])] if order_by == 'commit_date': branches = sorted(branches, key=lambda v: v['commit_date']) elif order_by == '-commit_date': branches = sorted(branches, key=lambda v: v['commit_date'], reverse=True) elif order_by == '-native': branches = sorted(branches, key=lambda v: (v['repo'], v['name']), reverse=True) else: branches = sorted(branches, key=lambda v: (v['repo'], v['name'])) if limit: branches = branches[:limit] logger.info('Successfully retrieved %s branches', len(branches)) return branches @staticmethod def _get_commits(workspace, branch=None, since=None, until=None, commit=None, author=None, committer=None, description=None, limit=None): """Search for commits in a workspace.""" try: logger.info('Scanning workspace (%s) for commits ...', workspace) git_client = GitClient(path=workspace, read_only=True) if commit and commit.startswith('~'): commit = commit[1:] if author and author.startswith('~'): author = author[1:] if committer and committer.startswith('~'): committer = committer[1:] if description and description.startswith('~'): description = description[1:] return git_client.get_commits(branch=branch, commit=commit, since=since, until=until, author=author, committer=committer, description=description, limit=limit) except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e) @staticmethod def _get_commit(workspace, commit): """Get a commit from a workspace.""" try: logger.info('Scanning workspace (%s) for commit (%s) ...', workspace, commit) git_client = GitClient(path=workspace, read_only=True) return git_client.get_commit(commit) except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e) @staticmethod def _parse_repo_branch(repo, branch, repo_branch): """Parse repo / branch / repo_branch.""" if repo: try: repo = unquote(repo) _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo) except Exception as e: msg = 'Unable to parse repo: %s', e logger.error(msg) raise AXApiInvalidParam('Unable to parse repo', msg) else: dir = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name) workspaces = {dir: [branch] if branch else []} elif repo_branch: try: repo_branch = json.loads(repo_branch) workspaces = {} for repo in repo_branch.keys(): repo = unquote(repo) _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo) dir = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name) if dir not in workspaces: workspaces[dir] = set() for branch in repo_branch[repo]: workspaces[dir].add(branch) except Exception as e: msg = 'Unable to parse repo_branch: %s' % str(e) logger.error(msg) raise AXApiInvalidParam('Unable to parse repo_branch', msg) else: dirs = [dir[0] for dir in os.walk(Gateway.BASE_DIR) if dir[0].endswith('/.git')] workspaces = list(map(lambda v: v[:-5], dirs)) workspaces = dict([(k, [branch] if branch else []) for k in workspaces]) return workspaces @staticmethod def _put_file(repo, branch, path): """Put a file in s3. :param repo: :param branch: :param path: :return: """ _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo) workspace = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name) if not os.path.isdir(workspace): raise AXApiInvalidParam('Invalid repository', 'Invalid repository ({})'.format(repo)) try: logger.info('Extracting file content from repository (repo: %s, branch: %s, path: %s) ...', repo, branch, path) git_client = GitClient(path=workspace, read_only=True) files = git_client.get_files(branch=branch, subdir=path, binary_mode=True) except Exception as e: message = 'Failed to extract file content' detail = '{}: {}'.format(message, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: if len(files) == 0: raise AXApiInvalidParam('Unable to locate file with given information') file_content = files[0]['content'] logger.info('Successfully extracted file content') try: # Cluster name id always has the form <cluster_name>-<36_bytes_long_cluster_id> cluster_name, cluster_id = Gateway.CLUSTER_NAME_ID[:-37], Gateway.CLUSTER_NAME_ID[-36:] key = '{cluster_name}/{cluster_id}/{vendor}/{repo_owner}/{repo_name}/{branch}/{path}'.format( cluster_name=cluster_name, cluster_id=cluster_id, vendor=vendor, repo_owner=repo_owner, repo_name=repo_name, branch=branch, path=path) logger.info('Uploading file content to s3 (bucket: %s, key: %s) ...', Gateway.S3_BUCKET_NAME, key) response = Gateway.s3_bucket.Object(key).put(Body=file_content) etag = response.get('ETag') if etag: etag = json.loads(etag) except Exception as e: message = 'Failed to upload file content' detail = '{}: {}'.format(message, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: logger.info('Successfully uploaded file content') return {'bucket': Gateway.S3_BUCKET_NAME, 'key': key, 'etag': etag} @staticmethod def _delete_file(repo, branch, path): """Delete a file from s3. :param repo: :param branch: :param path: :return: """ _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo) try: cluster_name, cluster_id = Gateway.CLUSTER_NAME_ID[:-37], Gateway.CLUSTER_NAME_ID[-36:] key = '{cluster_name}/{cluster_id}/{vendor}/{repo_owner}/{repo_name}/{branch}/{path}'.format( cluster_name=cluster_name, cluster_id=cluster_id, vendor=vendor, repo_owner=repo_owner, repo_name=repo_name, branch=branch, path=path) logger.info('Deleting file from s3 (bucket: %s, key: %s) ...', Gateway.S3_BUCKET_NAME, key) Gateway.s3_bucket.Object(key).delete() except Exception as e: message = 'Failed to delete file' detail = '{}: {}'.format(message, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: logger.info('Successfully deleted file') return {'bucket': Gateway.S3_BUCKET_NAME, 'key': key} @staticmethod def init_jira_client(axops_client, url=None, username=None, password=None): """Initialize an Jira client""" def get_jira_configuration(): js = axops_client.get_tools(category='issue_management', type='jira') if js: return {'url': js[0]['url'], 'username': js[0]['username'], 'password': js[0]['password'] } else: return dict() if url is None or username is None or password is None: conf = get_jira_configuration() if not conf: raise AXApiInvalidParam('No JIRA configured') else: url, username, password = conf['url'], conf['username'], conf['password'] return JiraClient(url, username, password) # Verify whether this function is still needed def check_github_whitelist(self): if not self.is_github_webhook_enabled(): logger.info('No GitHub webhook configured') return configured = self.get_from_cache() logger.info('The configured GitHub webhook whitelist is %s', configured) advertised = self.scm_clients[ScmVendors.GITHUB].get_webhook_whitelist() logger.info('The GitHub webhook whitelist is %s', advertised) if set(configured) == set(advertised): logger.info('No update needed') else: # Create ELB payload = {'ip_range': advertised, 'external_port': 8443, 'internal_port': 8087} try: logger.info('Creating ELB for webhook ...') self.axsys_client.create_webhook(**payload) except Exception as exc: logger.error('Failed to create ELB for webhook: %s', str(exc)) self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_CREATION_FAILURE, detail=payload) else: # Update cache self.write_to_cache(advertised) logger.info('Successfully updated ELB for webhook') def is_github_webhook_enabled(self): """ Check whether the webhook is configured or not""" github_data = self.axops_client.get_tools(type='github') use_webhook = [each for each in github_data if each['use_webhook']] return bool(use_webhook) @staticmethod def write_to_cache(ip_range): """ Store the webhook whitelist info""" cache_file = '/tmp/github_webhook_whitelist' with open(cache_file, 'w+') as f: f.write(json.dumps(ip_range)) def get_from_cache(self): """ Get cached webhook whitelist info, otherwise get from axmon""" cache_file = '/tmp/github_webhook_whitelist' ip_range = list() if os.path.exists(cache_file): with open(cache_file, 'r+') as f: data = f.readlines() ip_range = json.loads(data[0]) else: logger.debug('No cache file') try: data = self.axsys_client.get_webhook() except Exception as exc: logger.warning(exc) else: logger.info('Write whitelist info to cache file') ip_range = data['ip_ranges'] self.write_to_cache(ip_range) return ip_range
from ax.devops.ci.constants import ScmVendors from ax.devops.kafka.kafka_client import ProducerClient from ax.devops.redis.redis_client import RedisClient from ax.devops.scm.scm import GitClient, CodeCommitClient from ax.devops.settings import AxSettings from ax.version import __version__ logger = logging.getLogger(__name__) BASE_DIR = '/ax/data/repos' DEFAULT_CONCURRENCY = 20 DEFAULT_INTERVAL = 30 NAMESPACE = 'gateway' TEMPLATE_DIR = '.argo' redis_client = RedisClient('redis', db=10) class RepoManager(object): """Manage all repositories in track.""" def __init__(self, concurrency, interval): self.axdb_client = AxdbClient() self.axops_client = AxopsClient() self.concurrency = concurrency self.interval = interval def run(self): """Create workspaces and perform initial/incremental fetch.""" while True: logger.info('Start repository scan ...') try:
import json import jwt from ax.version import __version__ from ax.devops.axdb.axdb_client import AxdbClient from ax.devops.axsys.axsys_client import AxsysClient from ax.devops.axdb.axops_client import AxopsClient from ax.devops.redis.redis_client import RedisClient, DB_RESULT logger = logging.getLogger(__name__) axdb_client = AxdbClient() axsys_client = AxsysClient() axops_client = AxopsClient() redis_client = RedisClient(host='redis.axsys', db=DB_RESULT, retry_max_attempt=10, retry_wait_fixed=5000) class AXApprovalException(RuntimeError): pass class AXApproval(object): FAILURE_STATE = "FAILURE" WAITING_STATE = "WAITING" APPROVE_STRING = "APPROVE" DECLINE_STRING = "DECLINE" def __init__(self, required_list, optional_list, number_optional, timeout): self.task_id = None
class FixtureRequestProcessor(object): def __init__(self, fixturemgr, redis_host=None, redis_db=None): self.fixmgr = fixturemgr self.requestdb = FixtureRequestDatabase(self.fixmgr) port = None if redis_host and ':' in redis_host: redis_host, port = redis_host.split(':', 2) self.redis_client_notification = RedisClient(host=redis_host, port=port, db=DB_RESULT) self.process_interval = common.DEFAULT_PROCESS_INTERVAL self._stop = False # Condition variable to notify the request processor that it should # wake up and process the requests self._process_cv = threading.Condition() self._events = 0 self._request_processor_thread = None self._processor_lock = threading.Lock() @property def axdb_client(self): return self.fixmgr.axdb_client def create_fixture_request_mock(self, request): """Create a fixture request mock :param request: fixture request :return: created fixture request """ def _notify_reservation_available_mock(req): try: logger.info("get mock request for %s. existing assignment=%s", req.service_id, req.assignment) for req_name, _ in req.requirements.items(): req.assignment[req_name] = { 'attributes': { 'name': req_name }, 'name': req_name } sleep_second = random.randint(0, 20) logger.info("sleep %s seconds for mock request %s", sleep_second, req.service_id) time.sleep(sleep_second) logger.info("notify mock request %s", req.service_id) self._notify_channel(req) except Exception: logger.exception("mock exception") fixture_request = FixtureRequest(request) fixture_request.assignment = {} t = threading.Thread(name="mock-reply-thread-{}".format( fixture_request.service_id), target=_notify_reservation_available_mock, kwargs={'req': fixture_request}) t.daemon = True t.start() return fixture_request def create_fixture_request(self, request): """Create a fixture request. For volumes, if an anonymous volume is requested, this will create the volume as well as reserve it. :param request: fixture request :return: created fixture request """ synchronous = request.pop('synchronous', False) fix_req = FixtureRequest(request) self._validate_fixture_request(fix_req) if synchronous: # If in synchronous mode, we heed to hold the processor lock while we create the fixture request # and immediately process it. Otherwise, the background request processor may jump in and assign # it from underneath us. self._processor_lock.acquire() try: fix_req = self.requestdb.add(fix_req) if fix_req.assigned: logger.warning( "Client made duplicate request which was already assigned. Returning existing request:\n%s", pretty_json(fix_req.json())) elif synchronous: # If fixture request was sent in synchronous mode, we attempt to assign the fixture instance(s) immediately assigned_req = self._process_request(fix_req) if not assigned_req: self.requestdb.remove(fix_req.service_id) raise AXApiResourceNotFound( "Could not formulate resources for fixture request") fix_req = assigned_req else: # async request. we created the entry in the request database. now just notify the processor self.trigger_processor() finally: if synchronous: self._processor_lock.release() if synchronous and fix_req.vol_assignment: # This may raise AXTimeoutException, leaving the created & assigned fixture request. # The volume workers will continue to bring any volumes to active state. # It will be the responsibilty of the caller to decide if he should reissue the request # and wait longer for the volumes to become active, or give up and delete the request. self.fixmgr.volumemgr.wait_volumes_active(fix_req) if self.should_notify(fix_req): self._notify_channel(fix_req) return fix_req def _validate_fixture_request(self, fix_req): """Validates a fixture request by checking the current inventory of fixtures and volumes to ensure we can satisfy the request. :raises AXApiInvalidParam if request was invalid, or AXApiResourceNotFound if request was valid but could not be satisfied""" # Validate that the request (ensure attributes are valid) for requirement in fix_req.requirements.values(): if 'class' not in requirement: continue try: fix_class = self.fixmgr.get_fixture_class( name=requirement['class']) except AXApiResourceNotFound as err: raise AXApiInvalidParam(err.args[0]) for attr_name in requirement.get('attributes', {}).keys(): if attr_name not in fix_class.attributes: raise AXApiInvalidParam( "Fixture class {} does not have attribute {}".format( requirement['class'], attr_name)) if fix_req.requirements: # See if assignment is even possible given current inventory of fixtures. # If we cannot satisfy the request, we will reject the request, since it will never be assigned # (unless fixtures are added) self._find_candidates(fix_req.requirements, validate_request=True) # Do the same for volumes if fix_req.vol_requirements: self.fixmgr.volumemgr.find_named_volume_candidates( fix_req, validate_request=True) for vol_requirement in fix_req.vol_requirements.values(): if not vol_requirement.get('axrn'): # anonymous volume request. verify storage_class exists storage_class_name = vol_requirement.get('storage_class') if not storage_class_name: raise AXApiInvalidParam( "Volume request did not supply axrn or storage class" ) if not self.fixmgr.volumemgr.get_storage_class_by_name( storage_class_name): raise AXApiInvalidParam( "Storage class '{}' does not exist".format( storage_class_name)) def reserve_instances(self, fix_req, instance_ids): """ Updates the database and appends the referrer to instance's referrers :param fix_req: FixtureRequest object of the requestor :param instance_ids: list of instance ids of which to append the referrer to """ logger.info("%s reserving fixture : %s", fix_req, instance_ids) reserved_ids = [] with ExitStack() as stack: # Acquire lock on all volumes. Necessary for atomic assignment of multiple volumes for instance_id in instance_ids: stack.enter_context(lock_instance(instance_id)) # We have a lock on all instances try: for instance_id in instance_ids: instance = self.fixmgr.get_fixture_instance(id=instance_id) if not instance.is_reservable( service_id=fix_req.service_id): errmsg = "{} is no longer reservable".format(instance) logger.error("%s\n%s", errmsg, pretty_json(instance)) raise AXException(errmsg) modified = instance.add_referrer(fix_req.referrer()) if modified: self.fixmgr._persist_instance_updates(instance) reserved_ids.append(instance.id) logger.info( "Successfully reserved %s with referrer: %s", instance, fix_req) else: logger.warning("%s already had reservation on: %s", fix_req, instance) except Exception: logger.exception( "Failed to reserve instances. Undoing reservations: %s", reserved_ids) try: self._release_instances(fix_req, reserved_ids) except Exception: logger.warning("Failed to release partial reservation") raise def release_instances(self, fix_req, instance_ids): """Releases instances. Removes the referrer from the instance :param fix_req: FixtureRequest object of the requestor :param instance_ids: list of instance ids of which to remove the referrer from """ logger.info("%s releasing instances: %s", fix_req, instance_ids) if not instance_ids: return with ExitStack() as stack: # Acquire lock on all volumes. Necessary for atomic release of multiple instances for instance_id in instance_ids: stack.enter_context(lock_instance(instance_id)) self._release_instances(fix_req, instance_ids) def _release_instances(self, fix_req, instance_ids): """Internal helper to release instances. Lock on instance is assumed""" for instance_id in instance_ids: instance = self.fixmgr.get_fixture_instance(instance_id, verify_exists=False) if not instance: logger.warning("Instance %s no longer exists", instance_id) continue instance.remove_referrer(fix_req.service_id) self.fixmgr._persist_instance_updates(instance) logger.debug("%s release of %s successful", fix_req, instance) def delete_fixture_request(self, service_id): """Releases all fixtures requested, reserved, or deployed by the specified service id. :param service_id: service id of the requestor """ fix_req = self.requestdb.get(service_id, verify_exists=False) if not fix_req: logger.info( "No fixture request found for service id %s. Ignoring deletion", service_id) return self.requestdb.remove(service_id) assigned_fixture_ids = [f['id'] for f in fix_req.assignment.values()] if assigned_fixture_ids: self.release_instances(fix_req, assigned_fixture_ids) assigned_volume_ids = [ v['id'] for v in fix_req.vol_assignment.values() ] if assigned_volume_ids: self.fixmgr.volumemgr.release_volumes(fix_req, assigned_volume_ids) self.redis_client_notification.delete(fix_req.notification_channel) if assigned_fixture_ids or assigned_volume_ids: self.trigger_processor() logger.info("Deleted fixture request for service id %s", service_id) return service_id def get_fixture_request(self, service_id, verify_exists=True): """Return fixture request for a service id""" return self.requestdb.get(service_id, verify_exists=verify_exists) def get_fixture_requests(self, assigned=None): """Return a list of fixture requests""" return self.requestdb.items(assigned=assigned) def _find_candidates(self, requirements, validate_request=False): """For each fixture requirement, queries the fixture database for list of matching entities :param validate_request: if True, includes disabled instances in the query and raises a AXApiResourceNotFound error if no fixtures exists satisfying requirement :returns: None if no fixtures were found matching any of the requirements""" candidate_dict = {} for req_name, requirement in requirements.items(): query = {} if validate_request: query['deleted'] = False else: query['available'] = True for base_attr, base_val in requirement.items(): if base_attr == 'attributes': continue elif base_attr == 'class': cat = self.fixmgr.get_fixture_class(name=base_val) query['class_id'] = cat.id else: query[base_attr] = base_val for attr_name, attr_val in requirement.get('attributes', {}).items(): query['attributes.{}'.format(attr_name)] = attr_val candidates = list(self.fixmgr.query_fixture_instances(query)) if not candidates: if validate_request: raise AXApiResourceNotFound( "Impossible request: no instances exist satisfying requirement: {}" .format(requirement)) else: logger.debug("Failed to find fixture satisfying: %s", requirement) return None candidate_dict[req_name] = candidates return candidate_dict def _assign_candidates(self, candidate_map): """Finds a combination of assignments that will satisfy the mapping of requirements to candidates :param candidate_map: mapping of requirement_name to list of fixture candidates json :returns: a mapping of the requirement name to the assignment """ logger.debug("Formulating assignments for: %s", list(candidate_map.keys())) # Convert the candidate map to a sorted list of tuples of (requirement_name, fixture_id_set). # The list is sorted by most restrictive requirement to least restrictive requirement, in order to have # a faster assignment algorithm. candidate_list = sorted(candidate_map.items(), key=lambda x: len(x[1])) candidate_list = [(req_name, set([fix.id for fix in fix_list])) for (req_name, fix_list) in candidate_list] fix_id_assignment = self._assign_candidates_helper(candidate_list) if fix_id_assignment: assignment = {} for req_name, fix_id in fix_id_assignment.items(): assignment[req_name] = self.fixmgr.get_fixture_instance( id=fix_id) return assignment else: logger.warning("Assignment is impossible: %s", candidate_list) return None def _assign_candidates_helper(self, candidate_list): """Internal helper to _assign_candidates to recursively find a working assignment combination""" assignments = {} req_name, candidates = candidate_list[0][0], list(candidate_list[0][1]) if len(candidate_list) == 1: return {req_name: random.choice(candidates)} random.shuffle(candidates) for candidate in candidates: logger.debug("Attempting assignment: '%s' -> '%s'", req_name, candidate) assignments[req_name] = candidate # construct new candidate list which excludes current assignment from candidates sub_candidate_list = [] possible_assignment = True for _req_name, _cands in candidate_list[1:]: new_candidate_set = _cands - {candidate} if not new_candidate_set: logger.debug( "Assignment of '%s' -> '%s' prevents assignment of '%s'", req_name, candidate, _req_name) possible_assignment = False break sub_candidate_list.append((_req_name, new_candidate_set)) if not possible_assignment: continue sub_assignments = self._assign_candidates_helper( sub_candidate_list) if sub_assignments: assignments.update(sub_assignments) return assignments return None def _flatten_assignment(self, assignment): """Return a flattened fixture request assignment to be pushed to the notification/assignment channel""" flattened = {} for ref_name, instance in assignment.items(): flattened[ref_name] = instance.requestdoc() return flattened def should_notify(self, fix_req): """Tests whether if we should notify the requester about his fixture/volume assignment""" if not fix_req.assigned: return False if self.redis_client_notification.client.exists( fix_req.notification_channel): # if there is already a notification, no need to re-notify return False if not self.fixmgr.volumemgr.check_set_volumes_active(fix_req): logger.info( "Not all volumes active yet for %s. Skipping channel notification", fix_req) return False return True def _notify_channel(self, fix_req): """Notify listener by pushing the assignment to the redis list""" fix_names = [f['name'] for f in fix_req.assignment.values()] vol_ids = [v['id'] for v in fix_req.vol_assignment.values()] logger.info("Notifying %s of assignment: %s, vol_assignment: %s", fix_req.service_id, fix_names, vol_ids) self.redis_client_notification.rpush(fix_req.notification_channel, fix_req.json(), expire=3600 * 24 * 10, encoder=json.dumps) def process_requests(self): """Processes the list of all unassigned fixture requests""" with self._processor_lock: requests = self.get_fixture_requests() logger.info("Processing %s requests", len(requests)) num_assigned = 0 num_unassigned = 0 for fix_req in requests: try: if not fix_req.assigned: num_unassigned += 1 logger.info("Processing request: %s", fix_req.json()) if self._process_request(fix_req): num_assigned += 1 else: if self.should_notify(fix_req): self._notify_channel(fix_req) except Exception: logger.exception("Failed to process request: %s", fix_req) logger.info("Assigned %s/%s requests", num_assigned, num_unassigned) return num_assigned def _process_request(self, fix_req): """Processes a single fixture request. Returns the request if it was successfully assigned, None otherwise :param fix_req: a FixtureRequest instance""" fixture_assignment = None fixture_reserve_ids = [] volume_assignment = None if fix_req.requirements: # Fixtures are requested by attributes candidate_map = self._find_candidates(fix_req.requirements) if not candidate_map: return None fixture_assignment = self._assign_candidates(candidate_map) if not fixture_assignment: return None logger.debug("Preliminary instance assignment for %s:", fix_req) for ref_name, instance in fixture_assignment.items(): logger.debug("%s:\n%s", ref_name, pretty_json(instance.json())) fixture_reserve_ids.append(instance.id) if fix_req.vol_requirements: # Find and assign volumes are which are requested specifically by name volume_assignment = self.fixmgr.volumemgr.find_named_volume_candidates( fix_req) if volume_assignment is None: return None # Create and assign anonymous volume requests if not self.fixmgr.volumemgr.provision_anonymous_volumes( fix_req, volume_assignment): return None # If we get here, it means we have successfully found available fixtures and/or volumes which satisfies # the fixture request. The following steps will update the databases with the assignments. try: if fixture_assignment: self.reserve_instances(fix_req, fixture_reserve_ids) fix_req.assignment = self._flatten_assignment( fixture_assignment) if volume_assignment: self.fixmgr.volumemgr.reserve_volumes( fix_req, volume_assignment.values()) fix_req.vol_assignment = self.fixmgr.volumemgr.flatten_assignment( volume_assignment) fix_req.assignment_time = int(time.time() * 1e6) self.requestdb.update(fix_req) if fixture_assignment: self.update_service_object(fix_req) except Exception: logger.exception("Failed to reserve fixtures") # If any problems, release the reservations self.release_instances(fix_req, fixture_reserve_ids) if volume_assignment: self.fixmgr.volumemgr.release_volumes( fix_req, [vol.id for vol in volume_assignment.values()]) raise if self.should_notify(fix_req): self._notify_channel(fix_req) return fix_req def _request_processor(self): """Background thread which processes the fixture request queue""" while True: try: if self._events == 0: with self._process_cv: # Wait until next process interval, or we are notified of a change, whichever comes first logger.debug("Waiting for event or process interval") if self._process_cv.wait( timeout=self.process_interval): logger.debug("Notified of change event") else: logger.debug( "%s seconds elapsed. Forcing processing", self.process_interval) if self._stop: logger.debug("Stop requested. Exiting request processor") return with self._process_cv: logger.debug( "%s events occurred since last processing time", self._events) self._events = 0 self.process_requests() except Exception: logger.exception("Request processor failed") def update_service_object(self, fix_req): """Updates the service object and adds the instances to its 'fixtures' field. This is best effort""" logger.info("Updating service %s with assigned instances", fix_req.root_workflow_id) try: # NOTE: since request processor is single threaded, it is safe to update the service object # without a lock, and we are not worried about concurrent updates with axops since we are # only updating a single column (fixtures). service = self.axdb_client.get_service(fix_req.root_workflow_id) service_fixtures = service.get('fixtures') or {} # deserialize the json for instance_id, serialized_fixture_doc in service_fixtures.items( ): service_fixtures[instance_id] = json.loads( serialized_fixture_doc) for ref_name, assignment in fix_req.assignment.items(): assignment = copy.deepcopy(assignment) instance_id = assignment['id'] if instance_id not in service_fixtures: service_fixtures[instance_id] = assignment if fix_req.requester == common.FIX_REQUESTER_AXWORKFLOWADC: # 'service_ids' is a field added specially for the UI so that it can distinguish which steps utilized # which fixtures. We only want to add this for workflows and not deployments, since service_id means # deployment_id in the context of deployments. service_ids = service_fixtures[instance_id].get( 'service_ids', []) service_id_dict = { 'service_id': fix_req.service_id, 'reference_name': ref_name } if not next( (sid for sid in service_ids if sid == service_id_dict), None): service_ids.append(service_id_dict) service_fixtures[instance_id][ 'service_ids'] = service_ids logger.info("Updating service object with fixture assignment:\n%s", pretty_json(service_fixtures)) # serialize the json before storing for instance_id, deserialized_fixture_doc in service_fixtures.items( ): service_fixtures[instance_id] = json.dumps( deserialized_fixture_doc) payload = { 'template_name': service['template_name'], 'fixtures': service_fixtures, 'ax_update_if_exist': "", } self.axdb_client.update_service(service['task_id'], payload) except Exception: logger.exception( "Failed to update %s service object with fixture assignment", fix_req.root_workflow_id) def start_processor(self): """Start the background request processing thread""" with self._processor_lock: if self._request_processor_thread is None: logging.info("Request processor starting") self._request_processor_thread = threading.Thread( target=self._request_processor, name="request_processor", daemon=True) self._request_processor_thread.start() else: logging.info("Request processor already started") def stop_processor(self): """Stop the request processor thread if running""" with self._processor_lock: if self._request_processor_thread: logging.info("Request processor stopping") self._stop = True self.trigger_processor() self._request_processor_thread.join() self._request_processor_thread = None self._stop = False logging.info("Request processor stopped") else: logging.info("Request processor already stopped") def trigger_processor(self): """Internal trigger to notify request processor to process the request table""" with self._process_cv: self._events += 1 self._process_cv.notify()