def test_missing_id_field(self): # Note that the 'id' key is missing. definition = self.load_def("""{ "objects": [ { "name": "OVERRIDE-NAME", "type": "Schedule", "startDateTime": "2013-08-18T00:00:00", "endDateTime": "2013-08-19T00:00:00", "period": "1 day" } ]}""") with self.assertRaises(translator.PipelineDefinitionError): translator.definition_to_api_objects(definition)
def define_data_pipeline(self, client, pipe_id, emr_core_instances): import awscli.customizations.datapipeline.translator as trans base = self.get_package_path() if emr_core_instances != 0: definition_file = base + 'yaetos/definition.json' # see syntax in datapipeline-dg.pdf p285 # to add in there: /*"AdditionalMasterSecurityGroups": "#{}", /* To add later to match EMR mode */ else: definition_file = base + 'yaetos/definition_standalone_cluster.json' # TODO: have 1 json for both to avoid having to track duplication. definition = json.load( open(definition_file, 'r')) # Note: Data Pipeline doesn't support emr-6.0.0 yet. pipelineObjects = trans.definition_to_api_objects(definition) parameterObjects = trans.definition_to_api_parameters(definition) parameterValues = trans.definition_to_parameter_values(definition) parameterValues = self.update_params(parameterValues) logger.info('Filled pipeline with data from ' + definition_file) response = client.put_pipeline_definition( pipelineId=pipe_id, pipelineObjects=pipelineObjects, parameterObjects=parameterObjects, parameterValues=parameterValues) logger.info('put_pipeline_definition response: ' + str(response)) return parameterValues
def deploy(config_file='config.yaml'): path_to_config_file = os.path.join(os.getcwd(), config_file) cfg = read_cfg(path_to_config_file) profile = cfg.get('aws_profile') access_key_id = cfg.get('aws_access_key_id') secret_access_key = cfg.get('aws_secret_access_key') region = cfg.get('region') client = get_client(access_key_id, secret_access_key, region, profile) name = cfg.get('name') unique_id = cfg.get('unique_id') description = cfg.get('description', '') create_response = client.create_pipeline(name, unique_id, description) pipeline_id = create_response.get('pipelineId') parameter_objects = translator.definition_to_api_parameters( read_json_file(cfg.get('parameter_objects'))) parameter_values = translator.definition_to_parameter_values( read_json_file(cfg.get('parameter_values'))) pipeline_definition = translator.definition_to_api_objects( read_json_file(cfg.get('pipeline_definition'))) return client.put_pipeline_definition(pipeline_id, pipeline_definition, parameter_objects, parameter_values)
def set_pipeline_definition(self): """Translates the json definition and puts it on created pipeline Returns: dict: the response of the Boto3 command """ if not self.pipeline_id: self.get_pipeline_id() json_def = self.datapipeline_data['json_definition'] try: pipelineobjects = translator.definition_to_api_objects(json_def) parameterobjects = translator.definition_to_api_parameters( json_def) parametervalues = translator.definition_to_parameter_values( json_def) except translator.PipelineDefinitionError as error: LOG.warning(error) raise DataPipelineDefinitionError response = self.client.put_pipeline_definition( pipelineId=self.pipeline_id, pipelineObjects=pipelineobjects, parameterObjects=parameterobjects, parameterValues=parametervalues) LOG.debug(response) LOG.info("Successfully applied pipeline definition") return response
def test_value_with_refs(self): definition = self.load_def("""{"objects": [ { "id" : "emrActivity", "type" : "EmrActivity", "name" : "Foo", "step" : ["s3://foo1", {"ref": "otherValue"}, "s3://foo3"] } ]}""") actual = translator.definition_to_api_objects(definition) api = [{ "name": "Foo", "id": "emrActivity", "fields": [{ "key": "step", "stringValue": "s3://foo1" }, { "key": "step", "refValue": "otherValue" }, { "key": "step", "stringValue": "s3://foo3" }, { "key": "type", "stringValue": "EmrActivity" }] }] self.assertEqual(actual, api)
def to_new_definition_objects(new_definition, new_parameter_values): if new_parameter_values is not None: new_definition['values'] = new_parameter_values return dict( pipelineObjects=translator.definition_to_api_objects(new_definition), parameterObjects=translator.definition_to_api_parameters( new_definition), parameterValues=translator.definition_to_parameter_values( new_definition))
def add_to_params(self, parameters, value): if value is None: return parsed = json.loads(value) api_objects = translator.definition_to_api_objects(parsed) parameter_objects = translator.definition_to_api_parameters(parsed) parameter_values = translator.definition_to_parameter_values(parsed) parameters["pipelineObjects"] = api_objects # Use Parameter objects and values from def if not already provided if "parameterObjects" not in parameters and parameter_objects is not None: parameters["parameterObjects"] = parameter_objects if "parameterValues" not in parameters and parameter_values is not None: parameters["parameterValues"] = parameter_values
def add_to_params(self, parameters, value): if value is None: return parsed = json.loads(value) api_objects = translator.definition_to_api_objects(parsed) parameter_objects = translator.definition_to_api_parameters(parsed) parameter_values = translator.definition_to_parameter_values(parsed) parameters['pipelineObjects'] = api_objects # Use Parameter objects and values from def if not already provided if 'parameterObjects' not in parameters \ and parameter_objects is not None: parameters['parameterObjects'] = parameter_objects if 'parameterValues' not in parameters \ and parameter_values is not None: parameters['parameterValues'] = parameter_values
def test_value_with_refs(self): definition = self.load_def("""{"objects": [ { "id" : "emrActivity", "type" : "EmrActivity", "name" : "Foo", "step" : ["s3://foo1", {"ref": "otherValue"}, "s3://foo3"] } ]}""") actual = translator.definition_to_api_objects(definition) api = [{"name": "Foo", "id": "emrActivity", "fields": [ {"key": "step", "stringValue": "s3://foo1"}, {"key": "step", "refValue": "otherValue"}, {"key": "step", "stringValue": "s3://foo3"}, {"key": "type", "stringValue": "EmrActivity"} ]}] self.assertEqual(actual, api)
def test_convert_schedule_df_to_api(self): definition = self.load_def("""{"objects": [ { "id" : "S3ToS3Copy", "type" : "CopyActivity", "schedule" : { "ref" : "CopyPeriod" }, "input" : { "ref" : "InputData" }, "output" : { "ref" : "OutputData" } } ]}""") actual = translator.definition_to_api_objects(definition) api = [{"name": "S3ToS3Copy", "id": "S3ToS3Copy", "fields": [ {"key": "input", "refValue": "InputData"}, {"key": "output", "refValue": "OutputData"}, {"key": "schedule", "refValue": "CopyPeriod"}, {"key": "type", "stringValue": "CopyActivity"}, ]}] self.assertEqual(actual, api)
def test_convert_df_to_api_schedule(self): definition = self.load_def("""{ "objects": [ { "id": "MySchedule", "type": "Schedule", "startDateTime": "2013-08-18T00:00:00", "endDateTime": "2013-08-19T00:00:00", "period": "1 day" } ]}""") actual = translator.definition_to_api_objects(definition) api = [{"name": "MySchedule", "id": "MySchedule", "fields": [ {"key": "endDateTime", "stringValue": "2013-08-19T00:00:00"}, {"key": "period", "stringValue": "1 day"}, {"key": "startDateTime", "stringValue": "2013-08-18T00:00:00"}, {"key": "type", "stringValue": "Schedule"}, ]}] self.assertEqual(actual, api)
def test_convert_df_to_api_with_name(self): definition = self.load_def("""{ "objects": [ { "id": "MySchedule", "name": "OVERRIDE-NAME", "type": "Schedule", "startDateTime": "2013-08-18T00:00:00", "endDateTime": "2013-08-19T00:00:00", "period": "1 day" } ]}""") actual = translator.definition_to_api_objects(definition) api = [{ "name": "OVERRIDE-NAME", "id": "MySchedule", "fields": [ { "key": "endDateTime", "stringValue": "2013-08-19T00:00:00" }, { "key": "period", "stringValue": "1 day" }, { "key": "startDateTime", "stringValue": "2013-08-18T00:00:00" }, { "key": "type", "stringValue": "Schedule" }, ] }] self.assertEqual(actual, api)
def run_aws_data_pipeline(self): self.s3_ops(self.session) self.push_secrets(creds_or_file=self.app_args['connection_file'] ) # TODO: fix privileges to get creds in dev env # DataPipeline ops import awscli.customizations.datapipeline.translator as trans client = self.session.client('datapipeline') pipe_id = self.create_date_pipeline(client) definition_file = eu.LOCAL_APP_FOLDER + 'core/definition.json' # see syntax in datapipeline-dg.pdf p285 # to add in there: /*"AdditionalMasterSecurityGroups": "#{}", /* To add later to match EMR mode */ definition = json.load( open(definition_file, 'r')) # Note: Data Pipeline doesn't support emr-6.0.0 yet. pipelineObjects = trans.definition_to_api_objects(definition) parameterObjects = trans.definition_to_api_parameters(definition) parameterValues = trans.definition_to_parameter_values(definition) parameterValues = self.update_params(parameterValues) logger.info('Filled pipeline with data from ' + definition_file) response = client.put_pipeline_definition( pipelineId=pipe_id, pipelineObjects=pipelineObjects, parameterObjects=parameterObjects, parameterValues=parameterValues) logger.info('put_pipeline_definition response: ' + str(response)) response = client.activate_pipeline( pipelineId=pipe_id, parameterValues= parameterValues, # optional. If set, need to specify all params as per json. # startTimestamp=datetime(2018, 12, 1) # optional ) logger.info('activate_pipeline response: ' + str(response)) logger.info('Activated pipeline ' + pipe_id)
def test_convert_schedule_df_to_api(self): definition = self.load_def("""{"objects": [ { "id" : "S3ToS3Copy", "type" : "CopyActivity", "schedule" : { "ref" : "CopyPeriod" }, "input" : { "ref" : "InputData" }, "output" : { "ref" : "OutputData" } } ]}""") actual = translator.definition_to_api_objects(definition) api = [{ "name": "S3ToS3Copy", "id": "S3ToS3Copy", "fields": [ { "key": "input", "refValue": "InputData" }, { "key": "output", "refValue": "OutputData" }, { "key": "schedule", "refValue": "CopyPeriod" }, { "key": "type", "stringValue": "CopyActivity" }, ] }] self.assertEqual(actual, api)
def test_objects_key_is_missing_raise_error(self): definition = self.load_def("""{"not-objects": []}""") with self.assertRaises(translator.PipelineDefinitionError): translator.definition_to_api_objects(definition)