def test_df_to_api_with_parameter_objects_array(self): definition = self.load_def("""{"parameters": [ { "id":"myOccurrenceNumber", "type":"Integer", "description":["part1","part2"], "max":"24" } ]}""") actual = translator.definition_to_api_parameters(definition) api = [{ "id": "myOccurrenceNumber", "attributes": [{ "key": "description", "stringValue": "part1" }, { "key": "description", "stringValue": "part2" }, { "key": "max", "stringValue": "24" }, { "key": "type", "stringValue": "Integer" }] }] self.assertEqual(actual, api)
def define_data_pipeline(self, client, pipe_id, emr_core_instances): import awscli.customizations.datapipeline.translator as trans base = self.get_package_path() if emr_core_instances != 0: definition_file = base + 'yaetos/definition.json' # see syntax in datapipeline-dg.pdf p285 # to add in there: /*"AdditionalMasterSecurityGroups": "#{}", /* To add later to match EMR mode */ else: definition_file = base + 'yaetos/definition_standalone_cluster.json' # TODO: have 1 json for both to avoid having to track duplication. definition = json.load( open(definition_file, 'r')) # Note: Data Pipeline doesn't support emr-6.0.0 yet. pipelineObjects = trans.definition_to_api_objects(definition) parameterObjects = trans.definition_to_api_parameters(definition) parameterValues = trans.definition_to_parameter_values(definition) parameterValues = self.update_params(parameterValues) logger.info('Filled pipeline with data from ' + definition_file) response = client.put_pipeline_definition( pipelineId=pipe_id, pipelineObjects=pipelineObjects, parameterObjects=parameterObjects, parameterValues=parameterValues) logger.info('put_pipeline_definition response: ' + str(response)) return parameterValues
def deploy(config_file='config.yaml'): path_to_config_file = os.path.join(os.getcwd(), config_file) cfg = read_cfg(path_to_config_file) profile = cfg.get('aws_profile') access_key_id = cfg.get('aws_access_key_id') secret_access_key = cfg.get('aws_secret_access_key') region = cfg.get('region') client = get_client(access_key_id, secret_access_key, region, profile) name = cfg.get('name') unique_id = cfg.get('unique_id') description = cfg.get('description', '') create_response = client.create_pipeline(name, unique_id, description) pipeline_id = create_response.get('pipelineId') parameter_objects = translator.definition_to_api_parameters( read_json_file(cfg.get('parameter_objects'))) parameter_values = translator.definition_to_parameter_values( read_json_file(cfg.get('parameter_values'))) pipeline_definition = translator.definition_to_api_objects( read_json_file(cfg.get('pipeline_definition'))) return client.put_pipeline_definition(pipeline_id, pipeline_definition, parameter_objects, parameter_values)
def set_pipeline_definition(self): """Translates the json definition and puts it on created pipeline Returns: dict: the response of the Boto3 command """ if not self.pipeline_id: self.get_pipeline_id() json_def = self.datapipeline_data['json_definition'] try: pipelineobjects = translator.definition_to_api_objects(json_def) parameterobjects = translator.definition_to_api_parameters( json_def) parametervalues = translator.definition_to_parameter_values( json_def) except translator.PipelineDefinitionError as error: LOG.warning(error) raise DataPipelineDefinitionError response = self.client.put_pipeline_definition( pipelineId=self.pipeline_id, pipelineObjects=pipelineobjects, parameterObjects=parameterobjects, parameterValues=parametervalues) LOG.debug(response) LOG.info("Successfully applied pipeline definition") return response
def to_new_definition_objects(new_definition, new_parameter_values): if new_parameter_values is not None: new_definition['values'] = new_parameter_values return dict( pipelineObjects=translator.definition_to_api_objects(new_definition), parameterObjects=translator.definition_to_api_parameters( new_definition), parameterValues=translator.definition_to_parameter_values( new_definition))
def add_to_params(self, parameters, value): if value is None: return parsed = json.loads(value) api_objects = translator.definition_to_api_objects(parsed) parameter_objects = translator.definition_to_api_parameters(parsed) parameter_values = translator.definition_to_parameter_values(parsed) parameters["pipelineObjects"] = api_objects # Use Parameter objects and values from def if not already provided if "parameterObjects" not in parameters and parameter_objects is not None: parameters["parameterObjects"] = parameter_objects if "parameterValues" not in parameters and parameter_values is not None: parameters["parameterValues"] = parameter_values
def add_to_params(self, parameters, value): if value is None: return parsed = json.loads(value) api_objects = translator.definition_to_api_objects(parsed) parameter_objects = translator.definition_to_api_parameters(parsed) parameter_values = translator.definition_to_parameter_values(parsed) parameters['pipelineObjects'] = api_objects # Use Parameter objects and values from def if not already provided if 'parameterObjects' not in parameters \ and parameter_objects is not None: parameters['parameterObjects'] = parameter_objects if 'parameterValues' not in parameters \ and parameter_values is not None: parameters['parameterValues'] = parameter_values
def test_df_to_api_with_parameter_objects_missing(self): definition = self.load_def("""{"objects": [ { "id" : "S3ToS3Copy", "type" : "CopyActivity", "schedule" : { "ref" : "CopyPeriod" }, "input" : { "ref" : "InputData" }, "output" : { "ref" : "OutputData" } } ]}""") try: parameters = translator.definition_to_api_parameters(definition) error = "Absence of parameter objects should not display anything" self.assertIsNone(parameters, error) except PipelineDefinitionError: self.fail("No exception should be thrown in absence of objects")
def test_df_to_api_with_parameter_objects(self): definition = self.load_def("""{"parameters": [ { "id":"myOccurrenceNumber", "type":"Integer", "description":"Number of pipeline runs", "max":"24" } ]}""") actual = translator.definition_to_api_parameters(definition) api = [{"id": "myOccurrenceNumber", "attributes": [ {"key": "description", "stringValue": "Number of pipeline runs"}, {"key": "max", "stringValue": "24"}, {"key": "type", "stringValue": "Integer"} ]}] self.assertEqual(actual, api)
def run_aws_data_pipeline(self): self.s3_ops(self.session) self.push_secrets(creds_or_file=self.app_args['connection_file'] ) # TODO: fix privileges to get creds in dev env # DataPipeline ops import awscli.customizations.datapipeline.translator as trans client = self.session.client('datapipeline') pipe_id = self.create_date_pipeline(client) definition_file = eu.LOCAL_APP_FOLDER + 'core/definition.json' # see syntax in datapipeline-dg.pdf p285 # to add in there: /*"AdditionalMasterSecurityGroups": "#{}", /* To add later to match EMR mode */ definition = json.load( open(definition_file, 'r')) # Note: Data Pipeline doesn't support emr-6.0.0 yet. pipelineObjects = trans.definition_to_api_objects(definition) parameterObjects = trans.definition_to_api_parameters(definition) parameterValues = trans.definition_to_parameter_values(definition) parameterValues = self.update_params(parameterValues) logger.info('Filled pipeline with data from ' + definition_file) response = client.put_pipeline_definition( pipelineId=pipe_id, pipelineObjects=pipelineObjects, parameterObjects=parameterObjects, parameterValues=parameterValues) logger.info('put_pipeline_definition response: ' + str(response)) response = client.activate_pipeline( pipelineId=pipe_id, parameterValues= parameterValues, # optional. If set, need to specify all params as per json. # startTimestamp=datetime(2018, 12, 1) # optional ) logger.info('activate_pipeline response: ' + str(response)) logger.info('Activated pipeline ' + pipe_id)
def add_to_params(self, parameters, value): if value is None: return parsed = json.loads(value) parameter_objects = translator.definition_to_api_parameters(parsed) parameters['parameterObjects'] = parameter_objects