def get_job(self, namespace_name, job_name): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(job_name, 'job_name') return self._get( self._url('/namespaces/{0}/jobs/{1}', namespace_name, job_name) )
def create_job_run(self, namespace_name, job_name, run_id=None, nominal_start_time=None, nominal_end_time=None, run_args=None, mark_as_running=False): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(job_name, 'job_name') payload = {} if run_id: payload['id'] = run_id if nominal_start_time: payload['nominalStartTime'] = nominal_start_time if nominal_end_time: payload['nominalEndTime'] = nominal_end_time if run_args: payload['args'] = run_args response = self._post( self._url('/namespaces/{0}/jobs/{1}/runs', namespace_name, job_name), payload=payload) if mark_as_running: response = self.mark_job_run_as_started(run_id) return response
def create_job(self, namespace_name, job_name, job_type, location=None, input_dataset=None, output_dataset=None, description=None, context=None): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(job_name, 'job_name') Utils.is_instance_of(job_type, JobType) payload = { 'inputs': input_dataset or [], 'outputs': output_dataset or [], 'type': job_type.name } if context: payload['context'] = context if location: payload['location'] = location if description: payload['description'] = description return self._put(self._url('/namespaces/{0}/jobs/{1}', namespace_name, job_name), payload=payload)
def get_dataset(self, namespace_name, dataset_name): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(dataset_name, 'dataset_name') return self._get( self._url('/namespaces/{0}/datasets/{1}', namespace_name, dataset_name))
def create_job(self, namespace_name, job_name, job_type, location=None, inputs: [DatasetId] = None, outputs: [DatasetId] = None, description=None, context=None, run_id=None): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(job_name, 'job_name') Utils.is_instance_of(job_type, JobType) payload = { 'type': job_type.value, 'inputs': [ input.__dict__ for input in inputs ] if inputs else [], 'outputs': [ output.__dict__ for output in outputs ] if outputs else [] } if run_id: payload['runId'] = run_id if context: payload['context'] = context if location: payload['location'] = location if description: payload['description'] = description return self._put( self._url('/namespaces/{0}/jobs/{1}', namespace_name, job_name), payload=payload )
def list_datasets(self, namespace_name, limit=None, offset=None): Utils.check_name_length(namespace_name, 'namespace_name') return self._get(self._url('/namespaces/{0}/datasets', namespace_name), params={ 'limit': limit or DEFAULT_LIMIT, 'offset': offset or DEFAULT_OFFSET })
def list_jobs(self, namespace_name, limit=None, offset=None): Utils.check_name_length(namespace_name, 'namespace_name') return self._get(self._url('/namespaces/{0}/jobs', namespace_name), params={ 'limit': limit, 'offset': offset })
def tag_dataset(self, namespace_name, dataset_name, tag_name): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(dataset_name, 'dataset_name') if not tag_name: raise ValueError('tag_name must not be None') return self._post( self._url('/namespaces/{0}/datasets/{1}/tags/{2}', namespace_name, dataset_name, tag_name))
def list_job_runs(self, namespace_name, job_name, limit=None, offset=None): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(job_name, 'job_name') return self._get(self._url('/namespaces/{0}/jobs/{1}/runs', namespace_name, job_name), params={ 'limit': limit or DEFAULT_LIMIT, 'offset': offset or DEFAULT_OFFSET })
def get_dataset_version(self, namespace_name, dataset_name, version): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(dataset_name, 'dataset_name') if not version: raise ValueError('version must not be None') return self._get( self._url('/namespaces/{0}/datasets/{1}/versions/{2}', namespace_name, dataset_name, version))
def create_namespace(self, namespace_name, owner_name, description=None): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(owner_name, 'owner_name') payload = {'ownerName': owner_name} if description: payload['description'] = description return self._put(self._url('/namespaces/{0}', namespace_name), payload=payload)
def create_source(self, source_name, source_type, connection_url, description=None): Utils.check_name_length(source_name, 'source_name') Utils.is_valid_connection_url(connection_url) payload = { 'type': source_type.upper(), 'connectionUrl': connection_url } if description: payload['description'] = description return self._put(self._url('/sources/{0}', source_name), payload=payload)
def tag_dataset_field(self, namespace_name, dataset_name, field_name, tag_name): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(dataset_name, 'dataset_name') Utils.check_name_length(field_name, 'field_name') Utils.check_name_length(tag_name, 'tag_name') return self._post( self._url('/namespaces/{0}/datasets/{1}/fields/{2}/tags/{3}', namespace_name, dataset_name, field_name, tag_name))
def create_dataset(self, namespace_name, dataset_name, dataset_type, physical_name, source_name, description=None, run_id=None, schema_location=None, fields=None, tags=None): Utils.check_name_length(namespace_name, 'namespace_name') Utils.check_name_length(dataset_name, 'dataset_name') Utils.is_instance_of(dataset_type, DatasetType) if dataset_type == DatasetType.STREAM: MarquezClient._is_none(schema_location, 'schema_location') Utils.check_name_length(physical_name, 'physical_name') Utils.check_name_length(source_name, 'source_name') payload = { 'type': dataset_type.value, 'physicalName': physical_name, 'sourceName': source_name, } if description: payload['description'] = description if run_id: payload['runId'] = run_id if fields: payload['fields'] = fields if tags: payload['tags'] = tags if schema_location: payload['schemaLocation'] = schema_location return self._put(self._url('/namespaces/{0}/datasets/{1}', namespace_name, dataset_name), payload=payload)
def test_check_name_length(): with pytest.raises(ValueError): Utils.check_name_length(variable_value='a' * 65, variable_name='namespace_name') with pytest.raises(ValueError): Utils.check_name_length(variable_value='a' * 65, variable_name='owner_name') with pytest.raises(ValueError): Utils.check_name_length(variable_value='a' * 65, variable_name='source_name') with pytest.raises(ValueError): Utils.check_name_length(variable_value='a' * 256, variable_name='dataset_name') with pytest.raises(ValueError): Utils.check_name_length(variable_value='a' * 256, variable_name='field_name') with pytest.raises(ValueError): Utils.check_name_length(variable_value='a' * 256, variable_name='job_name') with pytest.raises(ValueError): Utils.check_name_length(variable_value='a' * 256, variable_name='tag_name')
def get_source(self, source_name): Utils.check_name_length(source_name, 'source_name') return self._get(self._url('/sources/{0}', source_name))
def get_namespace(self, namespace_name): Utils.check_name_length(namespace_name, 'namespace_name') return self._get(self._url('/namespaces/{0}', namespace_name))