def run(self): _logger.info('reading configuration...') output_config = copy.deepcopy(configuration(self.input_config_path, suppress_decryption=True)) dart_host = self._get_dart_host(output_config) _logger.info('setting up new dart partial environment: %s' % dart_host) self.create_partial(output_config) _logger.info('partial environment created with config: %s, url: %s' % (self.output_config_s3_path, dart_host))
def __init__(self, logger, configure_app_context=True): config_path = os.environ['DART_CONFIG'] self.dart_config = configuration(config_path) logging.config.dictConfig(self.dart_config['logging']) set_dart_environment_variables(self.dart_config['dart'].get('ecs_agent_data_path')) logger.info('loaded config from path: %s' % config_path) if configure_app_context: self.app_context = AppContext(self.dart_config, ['dart.web'])
def run(self): _logger.info('reading configuration...') output_config = copy.deepcopy( configuration(self.input_config_path, suppress_decryption=True)) dart_host = _get_dart_host(output_config) _logger.info('setting up new dart partial environment: %s' % dart_host) self.create_partial(output_config) _logger.info('partial environment created with config: %s, url: %s' % (self.output_config_s3_path, dart_host))
def __init__(self, logger, configure_app_context=True): config_path = os.getenv('DART_CONFIG') if config_path: self.dart_config = configuration(config_path) logging.config.dictConfig(self.dart_config['logging']) set_dart_environment_variables(self.dart_config['dart'].get('ecs_agent_data_path')) logger.info('loaded config from path: %s' % config_path) if configure_app_context: self.app_context = AppContext(self.dart_config, ['dart.web']) else: logger.error("missing DART_CONFIG env variable") raise ValueError("missing DART_CONFIG env variable")
def __init__(self, logger, configure_app_context=True): config_path = os.getenv('DART_CONFIG') if config_path: self.dart_config = configuration(config_path) logging.config.dictConfig(self.dart_config['logging']) set_dart_environment_variables( self.dart_config['dart'].get('ecs_agent_data_path')) logger.info('loaded config from path: %s' % config_path) if configure_app_context: self.app_context = AppContext(self.dart_config, ['dart.web']) else: logger.error("missing DART_CONFIG env variable") raise ValueError("missing DART_CONFIG env variable")
onelogin_server=config.get('auth').get('onelogin_server'), private_key=config.get('auth').get('private_key'), x509cert=config.get('auth').get('x509cert'), dart_server=config.get('auth').get('dart_server') ) f = open('./ui/onelogin' + '/settings.json', 'w') f.write(file_str) f.close() api_version_prefix = '/api/1' config_path = os.environ['DART_CONFIG'] config = configuration(config_path) logging.config.dictConfig(config['logging']) _logger.info('loaded config from path: %s' % config_path) app = Flask(__name__, template_folder='ui/templates', static_folder='ui/static') app.dart_context = AppContext( config=config, exclude_injectable_module_paths=[ 'dart.message.engine_listener', 'dart.message.trigger_listener', 'dart.message.subscription_listener' ] )
'type': 'string', 'minLength': 1, 'maxLength': 256, 'pattern': '^[a-zA-Z0-9]+[a-zA-Z0-9\-\.]*\.es\.amazonaws\.com$', 'description': 'The AWS Elasticsearch domain endpoint that you use to submit index and search requests.' }, }, 'additionalProperties': False, 'required': ['endpoint'] }, supported_action_types=[ ElasticsearchActionTypes.data_check, ElasticsearchActionTypes.create_index, ElasticsearchActionTypes.create_mapping, ElasticsearchActionTypes.create_template, ElasticsearchActionTypes.delete_index, ElasticsearchActionTypes.delete_template, ElasticsearchActionTypes.force_merge_index, ], ecs_task_definition=ecs_task_definition))) _logger.info('saved elasticsearch_engine: %s' % e1.id) if __name__ == '__main__': add_elasticsearch_engine(configuration(os.environ['DART_CONFIG']))
def run(self): _logger.info('reading configuration...') output_config = copy.deepcopy( configuration(self.input_config_path, suppress_decryption=True)) dart_host = _get_dart_host(output_config) _logger.info('setting up new dart full environment: %s' % dart_host) _logger.info('verifying s3 buckets do not exist') config_bucket_name = output_config['s3']['config_bucket'] data_bucket_name = output_config['s3']['data_bucket'] if 's3' not in self.stacks_to_skip: iequals = lambda s1, s2: s1.lower() == s2.lower() buckets = [ b['Name'] for b in boto3.client('s3').list_buckets()['Buckets'] ] matches = [ b for b in buckets if (iequals(b, config_bucket_name) or iequals(b, data_bucket_name)) ] if len(matches) > 0: raise Exception('s3 config and/or data bucket already exists!') _logger.info('creating initial stacks') aws_account_id = output_config['dart']['aws_account'] replacements = { '{DART_REGION}': output_config['dart']['region'], '{DART_AWS_ACCOUNT}': aws_account_id, '{DART_QUEUE_PREFIX}': output_config['dart']['queue_prefix'], '{DART_CONFIG_BUCKET}': output_config['s3']['config_bucket'], '{DART_DATA_BUCKET}': output_config['s3']['data_bucket'], } iam_stack_name = self._create_stack('iam', output_config, replacements) sns_stack_name = self._create_stack('sns', output_config) _logger.info('waiting for stack completion') iam_outputs = self._wait_for_stack_completion_and_get_outputs( iam_stack_name, 7) sns_outputs = self._wait_for_stack_completion_and_get_outputs( sns_stack_name, 1) uds_inpf_role = _get_element(iam_outputs, 'OutputKey', 'UdsInstanceProfileRole')['OutputValue'] uds_ec2_inpf = _get_element(iam_outputs, 'OutputKey', 'UdsEc2InstanceProfile')['OutputValue'] uds_ec2_inpf_role = _get_element( iam_outputs, 'OutputKey', 'UdsEc2InstanceProfileRole')['OutputValue'] ecs_container_inpf = _get_element( iam_outputs, 'OutputKey', 'EcsContainerInstanceProfile')['OutputValue'] ecs_container_inpf_role = _get_element( iam_outputs, 'OutputKey', 'EcsContainerInstanceProfileRole')['OutputValue'] ecs_service_role = _get_element(iam_outputs, 'OutputKey', 'EcsServiceRole')['OutputValue'] sns_arn = sns_outputs[0]['OutputValue'] _logger.info('updating configuration with sns arn') self._set_cfn_boto_param_value(output_config, 'logs', 'AlarmActions', sns_arn) _logger.info( 'updating configuration with subscription queue urls/arns') subscription_queue_arn, subscription_queue_url = self._ensure_queue_exists( output_config, 'subscription_queue') s3_params = output_config['cloudformation_stacks']['s3']['boto_args'][ 'Parameters'] _get_element(s3_params, 'ParameterKey', 'DartConfigBucket')['ParameterValue'] = config_bucket_name _get_element(s3_params, 'ParameterKey', 'DartDataBucket')['ParameterValue'] = data_bucket_name _get_element( s3_params, 'ParameterKey', 'SubscriptionQueueUrl')['ParameterValue'] = subscription_queue_url _get_element( s3_params, 'ParameterKey', 'SubscriptionQueueArn')['ParameterValue'] = subscription_queue_arn _logger.info('creating s3 and logs stacks') s3_stack_name = self._create_stack('s3', output_config) logs_stack_name = self._create_stack('logs', output_config) _logger.info('creating/updating kms key') with open( dart_root_relative_path( output_config['kms']['key_policy_template'])) as f: policy = json.load(f) kms_authorized_users = [ self._role_arn(ecs_container_inpf_role, aws_account_id) ] kms_authorized_users.extend( output_config['dart']['kms_key_user_arns']) policy['Statement'][0]['Principal'][ 'AWS'] = 'arn:aws:iam::%s:root' % aws_account_id policy['Statement'][1]['Principal']['AWS'] = output_config['dart'][ 'kms_key_admin_arns'] policy['Statement'][2]['Principal']['AWS'] = kms_authorized_users policy['Statement'][3]['Principal']['AWS'] = kms_authorized_users policy_text = json.dumps(policy) kms_client = boto3.client('kms') key_arn = output_config['kms']['key_arn'] if key_arn and key_arn != '...TBD...': kms_client.put_key_policy(KeyId=key_arn, PolicyName='default', Policy=policy_text) else: key_arn = kms_client.create_key( Policy=policy_text)['KeyMetadata']['Arn'] alias = 'alias/dart-%s-secrets' % self.environment_name kms_client.create_alias(AliasName=alias, TargetKeyId=key_arn) _logger.info( 'updating configuration with kms key arn and secrets path, etc') output_config['engines']['redshift_engine']['options'][ 'kms_key_arn'] = key_arn secrets_config = get_secrets_config(output_config) values = (config_bucket_name, self.environment_name) secrets_s3_path = 's3://%s/secrets/%s' % values secrets_config['secrets_s3_path'] = secrets_s3_path secrets_config['kms_key_arn'] = key_arn eng_cfg = output_config['engines'] eng_cfg['redshift_engine']['options'][ 'secrets_s3_path'] = secrets_s3_path output_config['dart'][ 's3_datastores_root'] = 's3://%s/datastores/%s' % values _logger.info('updating configuration with iam profiles/roles') output_config['engines']['emr_engine']['options'][ 'instance_profile'] = uds_ec2_inpf output_config['engines']['emr_engine']['options'][ 'service_role'] = uds_inpf_role output_config['engines']['dynamodb_engine']['options'][ 'emr_instance_profile'] = uds_ec2_inpf output_config['engines']['dynamodb_engine']['options'][ 'emr_service_role'] = uds_inpf_role self._set_cfn_boto_param_value(output_config, 'engine-taskrunner', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'engine-worker', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'web-internal', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'web-internal', 'WebEcsServiceRoleName', ecs_service_role) self._set_cfn_boto_param_value(output_config, 'web', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'web', 'WebEcsServiceRoleName', ecs_service_role) _logger.info('creating ECR repos') ecr_client = boto3.client('ecr') all_repo_names = [ self._full_repo_name(r, output_config) for r in output_config['ecr']['repo_names'] ] existing_repo_names = [] for repo_name in all_repo_names: try: ecr_client.describe_repositories(repositoryNames=[repo_name]) existing_repo_names.append(repo_name) except ClientError as e: if e.response['Error'][ 'Code'] == 'RepositoryNotFoundException': continue raise e missing_repo_names = set(all_repo_names) - set(existing_repo_names) with open( dart_root_relative_path( output_config['ecr']['policy_template'])) as f: initial_policy = json.load(f) initial_policy['Statement'][0]['Principal']['AWS'] = output_config[ 'dart']['ecr_authorized_user_arns'] initial_policy_text = json.dumps(initial_policy) for repo_name in missing_repo_names: ecr_client.create_repository(repositoryName=repo_name) ecr_client.set_repository_policy(repositoryName=repo_name, policyText=initial_policy_text) _logger.info('updating ECR repo policies') ecr_policy_statement_sid = 'dart-%s-ecs-and-uds-permissions' % self.environment_name ecs_container_inpf_role_arn = self._role_arn(ecs_container_inpf_role, aws_account_id) uds_ec2_inpf_role_arn = self._role_arn(uds_ec2_inpf_role, aws_account_id) for repo_name in all_repo_names: policy = json.loads( ecr_client.get_repository_policy( repositoryName=repo_name)['policyText']) exists_index = None for i, statement in enumerate(policy['Statement']): if statement['Sid'] == ecr_policy_statement_sid: exists_index = i if exists_index: policy['Statement'].pop(exists_index) policy['Statement'].append({ 'Sid': ecr_policy_statement_sid, 'Effect': 'Allow', 'Principal': { 'AWS': [ecs_container_inpf_role_arn, uds_ec2_inpf_role_arn] }, 'Action': [ 'ecr:GetDownloadUrlForLayer', 'ecr:BatchGetImage', 'ecr:BatchCheckLayerAvailability', 'ecr:GetAuthorizationToken' ] }) policy_text = json.dumps(policy) ecr_client.set_repository_policy(repositoryName=repo_name, policyText=policy_text) _logger.info('updating configuration with docker image references') output_config['local_setup'][ 'elasticmq_docker_image'] = self._docker_image( 'elasticmq', output_config) eng_cfg['no_op_engine']['docker_image'] = self._docker_image( 'engine-no_op', output_config) eng_cfg['emr_engine']['docker_image'] = self._docker_image( 'engine-emr', output_config) eng_cfg['emr_engine']['options'][ 'impala_docker_repo_base_url'] = self._ecr_base_url(output_config) eng_cfg['dynamodb_engine']['docker_image'] = self._docker_image( 'engine-dynamodb', output_config) eng_cfg['dynamodb_engine']['options'][ 'emr_impala_docker_repo_base_url'] = self._ecr_base_url( output_config) eng_cfg['redshift_engine']['docker_image'] = self._docker_image( 'engine-redshift', output_config) ew_image = self._docker_image('engine-worker', output_config) sw_image = self._docker_image('subscription-worker', output_config) tw_image = self._docker_image('trigger-worker', output_config) fl_image = self._docker_image('flask', output_config) nx_image = self._docker_image('nginx', output_config) cwl_image = self._docker_image('cloudwatchlogs', output_config) self._set_cfn_boto_param_value(output_config, 'engine-taskrunner', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'engine-worker', 'EngineWorkerDockerImage', ew_image) self._set_cfn_boto_param_value(output_config, 'engine-worker', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'SubscriptionWorkerDockerImage', sw_image) self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'TriggerWorkerDockerImage', tw_image) self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'web-internal', 'FlaskWorkerDockerImage', fl_image) self._set_cfn_boto_param_value(output_config, 'web-internal', 'NginxWorkerDockerImage', nx_image) self._set_cfn_boto_param_value(output_config, 'web-internal', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'web', 'FlaskWorkerDockerImage', fl_image) self._set_cfn_boto_param_value(output_config, 'web', 'NginxWorkerDockerImage', nx_image) self._set_cfn_boto_param_value(output_config, 'web', 'CloudWatchLogsDockerImage', cwl_image) _logger.info('updating configuration with DartConfig references') self._set_cfn_boto_param_value(output_config, 'engine-worker', 'DartConfig', self.output_config_s3_path) self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'DartConfig', self.output_config_s3_path) self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'DartConfig', self.output_config_s3_path) self._set_cfn_boto_param_value(output_config, 'web-internal', 'DartConfig', self.output_config_s3_path) self._set_cfn_boto_param_value(output_config, 'web', 'DartConfig', self.output_config_s3_path) eng_cfg['no_op_engine']['config'] = self.output_config_s3_path eng_cfg['emr_engine']['config'] = self.output_config_s3_path eng_cfg['dynamodb_engine']['config'] = self.output_config_s3_path eng_cfg['redshift_engine']['config'] = self.output_config_s3_path _logger.info('waiting for logs stack') logs_outputs = self._wait_for_stack_completion_and_get_outputs( logs_stack_name, 2) syslog_log_group_name = _get_element(logs_outputs, 'OutputKey', 'DartSyslog')['OutputValue'] misc_log_group_name = _get_element(logs_outputs, 'OutputKey', 'DartMisc')['OutputValue'] self._handle_docker_concerns(cwl_image, eng_cfg, misc_log_group_name, output_config, syslog_log_group_name) _logger.info('waiting for s3 stack') self._wait_for_stack_completion_and_get_outputs(s3_stack_name) self.create_partial(output_config) _logger.info('full environment created with config: %s, url: %s' % (self.output_config_s3_path, dart_host))
'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, 'target_distribution_key': { 'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, 'target_sort_key': { 'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting' }, }, 'additionalProperties': False, 'required': ['dataset_id'], }, supported_action_types=[ DynamoDBActionTypes.create_table, DynamoDBActionTypes.delete_table, DynamoDBActionTypes.load_dataset, ], ecs_task_definition=ecs_task_definition))) _logger.info('saved dynamodb_engine: %s' % e1.id) if __name__ == '__main__': add_dynamodb_engine(configuration(os.environ['DART_CONFIG']))
'description': 'the secret_access_key for accessing this elasticsearch cluster. ' + 'Leave blank to use Dart\'s instance profile credentials' }, 'endpoint': { 'type': 'string', 'minLength': 1, 'maxLength': 256, 'pattern': '^[a-zA-Z0-9]+[a-zA-Z0-9\-\.]*\.es\.amazonaws\.com$', 'description': 'The AWS Elasticsearch domain endpoint that you use to submit index and search requests.' }, }, 'additionalProperties': False, 'required': ['endpoint'] }, supported_action_types=[ ElasticsearchActionTypes.data_check, ElasticsearchActionTypes.create_index, ElasticsearchActionTypes.create_mapping, ElasticsearchActionTypes.create_template, ElasticsearchActionTypes.delete_index, ElasticsearchActionTypes.delete_template, ElasticsearchActionTypes.force_merge_index, ], ecs_task_definition=ecs_task_definition ))) _logger.info('saved elasticsearch_engine: %s' % e1.id) if __name__ == '__main__': add_elasticsearch_engine(configuration(os.environ['DART_CONFIG']))
Action(id=Ref.action(7), data=ActionData( name=NoOpActionTypes.action_that_fails.name, action_type_name=NoOpActionTypes.action_that_fails. name, engine_name='no_op_engine', workflow_id=Ref.workflow(2), order_idx=3, state=ActionState.TEMPLATE, )), ], triggers=[ Trigger(id=Ref.trigger(1), data=TriggerData( name='no-op-trigger-workflow-completion', trigger_type_name=workflow_completion_trigger.name, workflow_ids=[Ref.workflow(2)], state=TriggerState.ACTIVE, args={'completed_workflow_id': Ref.workflow(1)})), ], )) ] for e in subgraph_definitions: s = dart.save_subgraph_definition(e, engine_id) _logger.info('created subgraph_definition: %s' % s.id) if __name__ == '__main__': add_no_op_engine_sub_graphs(configuration(os.environ['DART_CONFIG']))
def run(self): _logger.info('reading configuration...') output_config = copy.deepcopy(configuration(self.input_config_path, suppress_decryption=True)) dart_host = _get_dart_host(output_config) _logger.info('setting up new dart full environment: %s' % dart_host) _logger.info('verifying s3 buckets do not exist') config_bucket_name = output_config['s3']['config_bucket'] data_bucket_name = output_config['s3']['data_bucket'] if 's3' not in self.stacks_to_skip: iequals = lambda s1, s2: s1.lower() == s2.lower() buckets = [b['Name'] for b in boto3.client('s3').list_buckets()['Buckets']] matches = [b for b in buckets if (iequals(b, config_bucket_name) or iequals(b, data_bucket_name))] if len(matches) > 0: raise Exception('s3 config and/or data bucket already exists!') _logger.info('creating initial stacks') aws_account_id = output_config['dart']['aws_account'] replacements = { '{DART_REGION}': output_config['dart']['region'], '{DART_AWS_ACCOUNT}': aws_account_id, '{DART_QUEUE_PREFIX}': output_config['dart']['queue_prefix'], '{DART_CONFIG_BUCKET}': output_config['s3']['config_bucket'], '{DART_DATA_BUCKET}': output_config['s3']['data_bucket'], } iam_stack_name = self._create_stack('iam', output_config, replacements) sns_stack_name = self._create_stack('sns', output_config) _logger.info('waiting for stack completion') iam_outputs = self._wait_for_stack_completion_and_get_outputs(iam_stack_name, 7) sns_outputs = self._wait_for_stack_completion_and_get_outputs(sns_stack_name, 1) uds_inpf_role = _get_element(iam_outputs, 'OutputKey', 'UdsInstanceProfileRole')['OutputValue'] uds_ec2_inpf = _get_element(iam_outputs, 'OutputKey', 'UdsEc2InstanceProfile')['OutputValue'] uds_ec2_inpf_role = _get_element(iam_outputs, 'OutputKey', 'UdsEc2InstanceProfileRole')['OutputValue'] ecs_container_inpf = _get_element(iam_outputs, 'OutputKey', 'EcsContainerInstanceProfile')['OutputValue'] ecs_container_inpf_role = _get_element(iam_outputs, 'OutputKey', 'EcsContainerInstanceProfileRole')['OutputValue'] ecs_service_role = _get_element(iam_outputs, 'OutputKey', 'EcsServiceRole')['OutputValue'] sns_arn = sns_outputs[0]['OutputValue'] _logger.info('updating configuration with sns arn') self._set_cfn_boto_param_value(output_config, 'logs', 'AlarmActions', sns_arn) _logger.info('updating configuration with subscription queue urls/arns') subscription_queue_arn, subscription_queue_url = self._ensure_queue_exists(output_config, 'subscription_queue') s3_params = output_config['cloudformation_stacks']['s3']['boto_args']['Parameters'] _get_element(s3_params, 'ParameterKey', 'DartConfigBucket')['ParameterValue'] = config_bucket_name _get_element(s3_params, 'ParameterKey', 'DartDataBucket')['ParameterValue'] = data_bucket_name _get_element(s3_params, 'ParameterKey', 'SubscriptionQueueUrl')['ParameterValue'] = subscription_queue_url _get_element(s3_params, 'ParameterKey', 'SubscriptionQueueArn')['ParameterValue'] = subscription_queue_arn _logger.info('creating s3 and logs stacks') s3_stack_name = self._create_stack('s3', output_config) logs_stack_name = self._create_stack('logs', output_config) _logger.info('creating/updating kms key') with open(dart_root_relative_path(output_config['kms']['key_policy_template'])) as f: policy = json.load(f) kms_authorized_users = [self._role_arn(ecs_container_inpf_role, aws_account_id)] kms_authorized_users.extend(output_config['dart']['kms_key_user_arns']) policy['Statement'][0]['Principal']['AWS'] = 'arn:aws:iam::%s:root' % aws_account_id policy['Statement'][1]['Principal']['AWS'] = output_config['dart']['kms_key_admin_arns'] policy['Statement'][2]['Principal']['AWS'] = kms_authorized_users policy['Statement'][3]['Principal']['AWS'] = kms_authorized_users policy_text = json.dumps(policy) kms_client = boto3.client('kms') key_arn = output_config['kms']['key_arn'] if key_arn and key_arn != '...TBD...': kms_client.put_key_policy(KeyId=key_arn, PolicyName='default', Policy=policy_text) else: key_arn = kms_client.create_key(Policy=policy_text)['KeyMetadata']['Arn'] alias = 'alias/dart-%s-secrets' % self.environment_name kms_client.create_alias(AliasName=alias, TargetKeyId=key_arn) _logger.info('updating configuration with kms key arn and secrets path, etc') output_config['engines']['redshift_engine']['options']['kms_key_arn'] = key_arn secrets_config = get_secrets_config(output_config) values = (config_bucket_name, self.environment_name) secrets_s3_path = 's3://%s/secrets/%s' % values secrets_config['secrets_s3_path'] = secrets_s3_path secrets_config['kms_key_arn'] = key_arn eng_cfg = output_config['engines'] eng_cfg['redshift_engine']['options']['secrets_s3_path'] = secrets_s3_path output_config['dart']['s3_datastores_root'] = 's3://%s/datastores/%s' % values _logger.info('updating configuration with iam profiles/roles') output_config['engines']['emr_engine']['options']['instance_profile'] = uds_ec2_inpf output_config['engines']['emr_engine']['options']['service_role'] = uds_inpf_role output_config['engines']['dynamodb_engine']['options']['emr_instance_profile'] = uds_ec2_inpf output_config['engines']['dynamodb_engine']['options']['emr_service_role'] = uds_inpf_role self._set_cfn_boto_param_value(output_config, 'engine-taskrunner', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'engine-worker', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'web-internal', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'web-internal', 'WebEcsServiceRoleName', ecs_service_role) self._set_cfn_boto_param_value(output_config, 'web', 'IamInstanceProfile', ecs_container_inpf) self._set_cfn_boto_param_value(output_config, 'web', 'WebEcsServiceRoleName', ecs_service_role) _logger.info('creating ECR repos') ecr_client = boto3.client('ecr') all_repo_names = [self._full_repo_name(r, output_config) for r in output_config['ecr']['repo_names']] existing_repo_names = [] for repo_name in all_repo_names: try: ecr_client.describe_repositories(repositoryNames=[repo_name]) existing_repo_names.append(repo_name) except ClientError as e: if e.response['Error']['Code'] == 'RepositoryNotFoundException': continue raise e missing_repo_names = set(all_repo_names) - set(existing_repo_names) with open(dart_root_relative_path(output_config['ecr']['policy_template'])) as f: initial_policy = json.load(f) initial_policy['Statement'][0]['Principal']['AWS'] = output_config['dart']['ecr_authorized_user_arns'] initial_policy_text = json.dumps(initial_policy) for repo_name in missing_repo_names: ecr_client.create_repository(repositoryName=repo_name) ecr_client.set_repository_policy(repositoryName=repo_name, policyText=initial_policy_text) _logger.info('updating ECR repo policies') ecr_policy_statement_sid = 'dart-%s-ecs-and-uds-permissions' % self.environment_name ecs_container_inpf_role_arn = self._role_arn(ecs_container_inpf_role, aws_account_id) uds_ec2_inpf_role_arn = self._role_arn(uds_ec2_inpf_role, aws_account_id) for repo_name in all_repo_names: policy = json.loads(ecr_client.get_repository_policy(repositoryName=repo_name)['policyText']) exists_index = None for i, statement in enumerate(policy['Statement']): if statement['Sid'] == ecr_policy_statement_sid: exists_index = i if exists_index: policy['Statement'].pop(exists_index) policy['Statement'].append({ 'Sid': ecr_policy_statement_sid, 'Effect': 'Allow', 'Principal': {'AWS': [ecs_container_inpf_role_arn, uds_ec2_inpf_role_arn]}, 'Action': [ 'ecr:GetDownloadUrlForLayer', 'ecr:BatchGetImage', 'ecr:BatchCheckLayerAvailability', 'ecr:GetAuthorizationToken' ] }) policy_text = json.dumps(policy) ecr_client.set_repository_policy(repositoryName=repo_name, policyText=policy_text) _logger.info('updating configuration with docker image references') output_config['local_setup']['elasticmq_docker_image'] = self._docker_image('elasticmq', output_config) eng_cfg['no_op_engine']['docker_image'] = self._docker_image('engine-no_op', output_config) eng_cfg['emr_engine']['docker_image'] = self._docker_image('engine-emr', output_config) eng_cfg['emr_engine']['options']['impala_docker_repo_base_url'] = self._ecr_base_url(output_config) eng_cfg['dynamodb_engine']['docker_image'] = self._docker_image('engine-dynamodb', output_config) eng_cfg['dynamodb_engine']['options']['emr_impala_docker_repo_base_url'] = self._ecr_base_url(output_config) eng_cfg['redshift_engine']['docker_image'] = self._docker_image('engine-redshift', output_config) ew_image = self._docker_image('engine-worker', output_config) sw_image = self._docker_image('subscription-worker', output_config) tw_image = self._docker_image('trigger-worker', output_config) fl_image = self._docker_image('flask', output_config) nx_image = self._docker_image('nginx', output_config) cwl_image = self._docker_image('cloudwatchlogs', output_config) self._set_cfn_boto_param_value(output_config, 'engine-taskrunner', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'engine-worker', 'EngineWorkerDockerImage', ew_image) self._set_cfn_boto_param_value(output_config, 'engine-worker', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'SubscriptionWorkerDockerImage', sw_image) self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'TriggerWorkerDockerImage', tw_image) self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'web-internal', 'FlaskWorkerDockerImage', fl_image) self._set_cfn_boto_param_value(output_config, 'web-internal', 'NginxWorkerDockerImage', nx_image) self._set_cfn_boto_param_value(output_config, 'web-internal', 'CloudWatchLogsDockerImage', cwl_image) self._set_cfn_boto_param_value(output_config, 'web', 'FlaskWorkerDockerImage', fl_image) self._set_cfn_boto_param_value(output_config, 'web', 'NginxWorkerDockerImage', nx_image) self._set_cfn_boto_param_value(output_config, 'web', 'CloudWatchLogsDockerImage', cwl_image) _logger.info('updating configuration with DartConfig references') self._set_cfn_boto_param_value(output_config, 'engine-worker', 'DartConfig', self.output_config_s3_path) self._set_cfn_boto_param_value(output_config, 'subscription-worker', 'DartConfig', self.output_config_s3_path) self._set_cfn_boto_param_value(output_config, 'trigger-worker', 'DartConfig', self.output_config_s3_path) self._set_cfn_boto_param_value(output_config, 'web-internal', 'DartConfig', self.output_config_s3_path) self._set_cfn_boto_param_value(output_config, 'web', 'DartConfig', self.output_config_s3_path) eng_cfg['no_op_engine']['config'] = self.output_config_s3_path eng_cfg['emr_engine']['config'] = self.output_config_s3_path eng_cfg['dynamodb_engine']['config'] = self.output_config_s3_path eng_cfg['redshift_engine']['config'] = self.output_config_s3_path _logger.info('waiting for logs stack') logs_outputs = self._wait_for_stack_completion_and_get_outputs(logs_stack_name, 2) syslog_log_group_name = _get_element(logs_outputs, 'OutputKey', 'DartSyslog')['OutputValue'] misc_log_group_name = _get_element(logs_outputs, 'OutputKey', 'DartMisc')['OutputValue'] self._handle_docker_concerns(cwl_image, eng_cfg, misc_log_group_name, output_config, syslog_log_group_name) _logger.info('waiting for s3 stack') self._wait_for_stack_completion_and_get_outputs(s3_stack_name) self.create_partial(output_config) _logger.info('full environment created with config: %s, url: %s' % (self.output_config_s3_path, dart_host))
from flask.ext.login import login_required from dart.auth.required_roles import required_roles from sqlalchemy import text from dart.context.database import db from dart.model.mutex import Mutexes, MutexState from dart.util.rand import random_id from dart.config.config import configuration from dart.web.ui.admin.admin_query import populate_dart_client_user, populate_dart_client_apikeys, clear_roles_table,\ populate_roles_table, populate_user_roles_table, getPermissionServiceRolesAndIds admin_bp = Blueprint('admin', __name__) CONFIG_PATH = os.environ['DART_CONFIG'] CONFIG = configuration(CONFIG_PATH) AUTH_CONFIG = CONFIG['auth'] DART_CLIENT_NAME = CONFIG['authorization']['dart_client_name'] PERMISSION_CONFIG = CONFIG['permission_service'] _logger = logging.getLogger(__name__) def populate_user_api_secret_keys(): ''' Under auth.predefined_auth_services we keep a triplet <user,api_key, secretKey> that an external service (e.g. portico, decode, savor) uses. By being placed in a config file we can have different keys for different envs thaqt are always loaded when DART starts. ''' if AUTH_CONFIG.get('predefined_auth_services'): for usr_api_secret in AUTH_CONFIG.get('predefined_auth_services'): items = usr_api_secret.split(" ")
-- anonymous/none user datastore is free for all to edit/run/delete -- anonymous current user cannot do anything but view. * No limit on creating datastores. """ import logging import os from dart.config.config import configuration from flask_login import current_user from flask import make_response, current_app from dart.service.user_roles import UserRolesService from dart.web.api.entity_lookup import get_known_entity from functools import wraps _logger = logging.getLogger(__name__) config_path = os.environ['DART_CONFIG'] config = configuration(config_path) DART_CLIENT_NAME = config['authorization']['dart_client_name'] def dart_required_roles(action_roles): def wrap(f): @wraps(f) def wrapped_f(*args, **kwargs): return f(*args, **kwargs) return wrapped_f return wrap
{ 'containerPath': '/mnt/ecs_agent_data', 'sourceVolume': 'ecs-agent-data', 'readOnly': True } ], } ], 'volumes': [ { 'host': {'sourcePath': '/var/lib/ecs/data'}, 'name': 'ecs-agent-data' } ], } e1 = dart.save_engine(engine=Engine( id=engine_id, data=EngineData( name='s3_engine', description='For S3 FileCopy', options_json_schema={}, supported_action_types=[ S3ActionTypes.copy ], ecs_task_definition=ecs_task_definition ))) _logger.info('Saved s3_engine: %s' % e1.id) if __name__ == '__main__': add_s3_engine(configuration(os.environ['DART_CONFIG']))
'integer', 'default': 2, 'minimum': 1, 'maximum': 10, 'description': 'the maximum number of snapshots to keep, older ones will be deleted' }, }, 'additionalProperties': False, 'required': ['master_user_password'] }, supported_action_types=[ RedshiftActionTypes.start_datastore, RedshiftActionTypes.stop_datastore, RedshiftActionTypes.execute_sql, RedshiftActionTypes.load_dataset, RedshiftActionTypes.consume_subscription, RedshiftActionTypes.copy_to_s3, RedshiftActionTypes.create_snapshot, RedshiftActionTypes.data_check, ], ecs_task_definition=ecs_task_definition))) _logger.info('saved redshift_engine: %s' % e1.id) if __name__ == '__main__': add_redshift_engine(configuration(os.environ['DART_CONFIG']))
'description': 'UTC time when automated cluster maintenance can occur' }, 'snapshot_retention': { 'type': 'integer', 'default': 2, 'minimum': 1, 'maximum': 10, 'description': 'the maximum number of snapshots to keep, older ones will be deleted' }, }, 'additionalProperties': False, 'required': ['master_user_password'] }, supported_action_types=[ RedshiftActionTypes.start_datastore, RedshiftActionTypes.stop_datastore, RedshiftActionTypes.execute_sql, RedshiftActionTypes.load_dataset, RedshiftActionTypes.consume_subscription, RedshiftActionTypes.copy_to_s3, RedshiftActionTypes.create_snapshot, RedshiftActionTypes.data_check, ], ecs_task_definition=ecs_task_definition ))) _logger.info('saved redshift_engine: %s' % e1.id) if __name__ == '__main__': add_redshift_engine(configuration(os.environ['DART_CONFIG']))
import os from flask.ext.sqlalchemy import SQLAlchemy, Model from sqlalchemy import create_engine import sqlalchemy.sql.expression from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import scoped_session, sessionmaker from dart.config.config import configuration class DartDbSession(object): def __init__(self, model, func, session): self.Model = model self.func = func self.session = session def init_dart_db(database_uri_alias='SQLALCHEMY_DATABASE_URI'): engine = create_engine(config['flask'][database_uri_alias], convert_unicode=True) db_session = scoped_session(sessionmaker(autocommit=False, autoflush=False, bind=engine)) base = declarative_base(cls=Model, name='Model') base.query = db_session.query_property() return DartDbSession(base, sqlalchemy.sql.expression.func, db_session) config = configuration(os.environ['DART_CONFIG']) db = SQLAlchemy() if os.environ.get('DART_ROLE') == 'web' else init_dart_db('SQLALCHEMY_DATABASE_URI') db_replica = SQLAlchemy() if os.environ.get('DART_ROLE') == 'web' else init_dart_db('SQLALCHEMY_DATABASE_REPLICA_URI')
Trigger(id=Ref.trigger(1), data=TriggerData( name='emr-trigger-subscription-1G-batch', trigger_type_name=subscription_batch_trigger.name, workflow_ids=[Ref.workflow(1)], args={ 'subscription_id': Ref.subscription(1), 'unconsumed_data_size_in_bytes': 1000*1000*1000 } )), ], actions=[ Action(id=Ref.action(1), data=ActionData( name='emr-action-consume_subscription', action_type_name=EmrActionTypes.consume_subscription.name, engine_name='emr_engine', workflow_id=Ref.workflow(1), state=ActionState.TEMPLATE, args={'subscription_id': Ref.subscription(1)} )), ] )) ] for e in subgraph_definitions: s = dart.save_subgraph_definition(e, engine_id) _logger.info('created subgraph_definition: %s' % s.id) if __name__ == '__main__': add_emr_engine_sub_graphs(configuration(os.environ['DART_CONFIG']))
e1 = dart.save_engine(Engine(id=engine_id, data=EngineData( name='no_op_engine', description='Helps engineering test dart', options_json_schema={ 'type': 'object', 'properties': { 'action_sleep_time_in_seconds': { 'type': 'integer', 'minimum': 0, 'default': 5, 'description': 'The time to sleep for each action before completing' }, }, 'additionalProperties': False, 'required': [], }, supported_action_types=[ NoOpActionTypes.action_that_succeeds, NoOpActionTypes.action_that_fails, NoOpActionTypes.copy_hdfs_to_s3_action, NoOpActionTypes.load_dataset, NoOpActionTypes.consume_subscription ], ecs_task_definition=ecs_task_definition ))) _logger.info('saved no_op_engine: %s' % e1.id) if __name__ == '__main__': add_no_op_engine(configuration(os.environ['DART_CONFIG']))
} ], } e1 = dart.save_engine(Engine(id=engine_id, data=EngineData( name='dynamodb_engine', description='For DynamoDB tables', options_json_schema={ 'type': 'object', 'properties': { 'dataset_id': {'type': 'string', 'description': 'The id of the dataset on which the table is based'}, 'target_table_name': {'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting'}, 'target_distribution_key': {'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting'}, 'target_sort_key': {'type': ['string', 'null'], 'default': None, 'pattern': '^[a-zA-Z0-9_]+$', 'description': 'overrides dataset setting'}, }, 'additionalProperties': False, 'required': ['dataset_id'], }, supported_action_types=[ DynamoDBActionTypes.create_table, DynamoDBActionTypes.delete_table, DynamoDBActionTypes.load_dataset, ], ecs_task_definition=ecs_task_definition ))) _logger.info('saved dynamodb_engine: %s' % e1.id) if __name__ == '__main__': add_dynamodb_engine(configuration(os.environ['DART_CONFIG']))