示例#1
0
 def test_workflow_search(self):
     """
     test gbdx.workflow.search(lookback_h=<hours>, state=<state>, owner=<owner>)
     """
     wf = Workflow()
     output = wf.search(lookback_h=12, state='all')
     self.assertTrue(len(output), 0)
示例#2
0
 def test_workflow_search(self):
     """
     test gbdx.workflow.search(lookback_h=<hours>, state=<state>, owner=<owner>)
     """
     wf = Workflow()
     output = wf.search(lookback_h=12, state='all')
     self.assertTrue(len(output), 0)
示例#3
0
def test_describe_tasks():
    wf = Workflow(gbdx)
    taskinfo = wf.list_tasks()
    assert len(taskinfo) > 0
    desc = wf.describe_task(taskinfo['tasks'][0])
    assert isinstance(desc, dict)
    assert len(desc['description']) > 0   
示例#4
0
def test_describe_tasks():
    wf = Workflow(gbdx)
    taskinfo = wf.list_tasks()
    assert len(taskinfo) > 0
    desc = wf.describe_task(taskinfo['tasks'][0])
    assert isinstance(desc, dict)
    assert len(desc['description']) > 0
示例#5
0
    def test_batch_workflows(self):
        """
        tests all 3 endpoints for batch workflows, create, fetch, and cancel
        :return:
        """
        wf = Workflow()

        with open(os.path.join(self.data_path,
                               "batch_workflow.json")) as json_file:
            self.batch_workflow_json = json.loads(json_file.read())

        # test create
        batch_workflow_id = wf.launch_batch_workflow(self.batch_workflow_json)

        # test status
        batch_workflow_status = wf.batch_workflow_status(batch_workflow_id)
        self.assertEqual(batch_workflow_id,
                         batch_workflow_status.get("batch_workflow_id"))

        # test cancel
        batch_workflow_status = wf.batch_workflow_cancel(batch_workflow_id)

        workflows = batch_workflow_status.get('workflows')

        for workflow in workflows:
            self.assertTrue(workflow.get('state') in ["canceling", "canceled"])
示例#6
0
 def test_describe_tasks(self):
     wf = Workflow(self.gbdx)
     taskinfo = wf.list_tasks()
     self.assertTrue(len(taskinfo) > 0)
     desc = wf.describe_task(taskinfo['tasks'][0])
     self.assertTrue(isinstance(desc, dict))
     self.assertTrue(len(desc['description']) > 0)
    def test_task_get_stdout(self):
        """
        test gbdx.workflows.get_stdout(<workflow_id>,<task_id>)
        """
        wf = Workflow(self.gbdx)

        output = wf.get_stdout('4488969848362445219','4488969848354891944')

        self.assertTrue(len(output) > 0)
示例#8
0
    def test_task_get_stderr(self):
        """
        test gbdx.workflows.get_stdout(<workflow_id>,<task_id>)
        """
        wf = Workflow()

        output = wf.get_stderr('4488969848362445219','4488969848354891944')

        self.assertEquals('<empty>', output)
示例#9
0
    def test_task_get_stdout(self):
        """
        test gbdx.workflows.get_stdout(<workflow_id>,<task_id>)
        """
        wf = Workflow()

        output = wf.get_stdout('4488969848362445219', '4488969848354891944')

        self.assertTrue(len(output) > 0)
示例#10
0
    def test_task_get_stderr(self):
        """
        test gbdx.workflows.get_stdout(<workflow_id>,<task_id>)
        """
        wf = Workflow()

        output = wf.get_stderr('4488969848362445219', '4488969848354891944')

        self.assertEqual('<empty>', output)
示例#11
0
    def test_workflow_get(self):
        """
        test gbdx.workflows.get(<workflow_id>)
        """
        wf = Workflow(self.gbdx)

        output = wf.get('4488969848362445219')

        self.assertTrue('id' in output.keys())
        self.assertTrue('owner' in output.keys())
        self.assertTrue('submitted_time' in output.keys())
        self.assertTrue('state' in output.keys())
        self.assertTrue('callback' in output.keys())
        self.assertTrue('tasks' in output.keys())
示例#12
0
    def test_workflow_get(self):
        """
        test gbdx.workflows.get(<workflow_id>)
        """
        wf = Workflow()

        output = wf.get('4488969848362445219')

        self.assertTrue('id' in output.keys())
        self.assertTrue('owner' in output.keys())
        self.assertTrue('submitted_time' in output.keys())
        self.assertTrue('state' in output.keys())
        self.assertTrue('callback' in output.keys())
        self.assertTrue('tasks' in output.keys())
示例#13
0
def test_workflow_events():
    wf = Workflow(gbdx)
    workflow_id = '4347109104758907277'
    events = wf.events(workflow_id)
    assert len(events) > 0
    assert isinstance(events, list)
    for event in events:
        assert 'task' in event.keys()
        assert 'state' in event.keys()
        assert 'event' in event.keys()
        assert 'timestamp' in event.keys()
        assert 'when' in event.keys()
        assert 'note' in event.keys()
        assert event['state'] in ['pending','running','complete']
        assert event['event'] in ['submitted','scheduled','rescheduling','started','succeeded','failed','timedout']
示例#14
0
 def test_workflow_events(self):
     wf = Workflow(self.gbdx)
     workflow_id = '4347109104758907277'
     events = wf.events(workflow_id)
     assert len(events) > 0
     assert isinstance(events, list)
     for event in events:
         assert 'task' in event.keys()
         assert 'state' in event.keys()
         assert 'event' in event.keys()
         assert 'timestamp' in event.keys()
         assert 'when' in event.keys()
         assert 'note' in event.keys()
         assert event['state'] in ['pending','running','complete']
         assert event['event'] in ['submitted','scheduled','rescheduling','started','succeeded','failed','timedout']
    def test_workflow_callback_is_retrieved_in_workflow_status(self):
        """
        Verify we can set task timeouts, it appears in the json, and launching a workflow works
        """
        aoptask = self.gbdx.Task("AOP_Strip_Processor", data='testing')
        callback_url = 'http://requestb.in/qg8wzqqg'

        # launch a workflow and verify it launches:
        w = self.gbdx.Workflow([aoptask], callback=callback_url)

        w.execute()

        wf_api = WorkflowAPI(self.gbdx)
        wf_body = wf_api.get(w.id)
        assert wf_body['callback'] == callback_url
示例#16
0
    def test_workflow_callback_is_retrieved_in_workflow_status(self):
        """
        Verify we can set task timeouts, it appears in the json, and launching a workflow works
        """
        aoptask = self.gbdx.Task("AOP_Strip_Processor", data='testing')
        callback_url = 'http://requestb.in/qg8wzqqg'

        # launch a workflow and verify it launches:
        w = self.gbdx.Workflow([aoptask], callback=callback_url)

        w.execute()

        wf_api = WorkflowAPI()
        wf_body = wf_api.get(w.id)
        assert wf_body['callback'] == callback_url
示例#17
0
    def __init__(self, **kwargs):
        interface = Auth(**kwargs)
        self.gbdx_connection = interface.gbdx_connection
        self.root_url = interface.root_url
        self.logger = interface.logger

        # create and store an instance of the GBDX s3 client
        self.s3 = S3()

        # create and store an instance of the GBDX Ordering Client
        self.ordering = Ordering()

        # create and store an instance of the GBDX Catalog Client
        self.catalog = Catalog()

        # create and store an instance of the GBDX Workflow Client
        self.workflow = Workflow()

        # create and store an instance of the Idaho Client
        self.idaho = Idaho()

        self.vectors = Vectors()

        self.catalog_image = CatalogImage
        self.idaho_image = IdahoImage

        self.task_registry = TaskRegistry()
示例#18
0
    def __init__(self, **kwargs):
        interface = Auth(**kwargs)
        self.gbdx_connection = interface.gbdx_connection
        self.root_url = interface.root_url
        self.logger = interface.logger

        # create and store an instance of the GBDX s3 client
        self.s3 = S3()

        # create and store an instance of the GBDX Ordering Client
        self.ordering = Ordering()

        # create and store an instance of the GBDX Catalog Client
        self.catalog = Catalog()

        # create and store an instance of the GBDX Workflow Client
        self.workflow = Workflow()

        # create and store an instance of the Idaho Client
        self.idaho = Idaho()

        self.vectors = Vectors()

        self.catalog_image = CatalogImage
        self.idaho_image = IdahoImage
        self.landsat_image = LandsatImage
        self.sentinel2 = Sentinel2
        self.tms_image = TmsImage
        self.dem_image = DemImage
        self.wv03_vnir = WV03_VNIR
        self.wv02 = WV02
        self.ge01 = GE01
        self.s3_image = S3Image

        self.task_registry = TaskRegistry()
示例#19
0
    def test_batch_workflows(self):
        """
        tests all 3 endpoints for batch workflows, create, fetch, and cancel
        :return:
        """
        wf = Workflow(self.gbdx)

        with open(os.path.join(self.data_path, "batch_workflow.json")) as json_file:
            self.batch_workflow_json = json.loads(json_file.read())

        # test create
        batch_workflow_id = wf.launch_batch_workflow(self.batch_workflow_json)

        # test status
        batch_workflow_status = wf.batch_workflow_status(batch_workflow_id)
        self.assertEqual(batch_workflow_id, batch_workflow_status.get("batch_workflow_id"))

        # test cancel
        batch_workflow_status = wf.batch_workflow_cancel(batch_workflow_id)

        workflows = batch_workflow_status.get('workflows')

        for workflow in workflows:
            self.assertTrue(workflow.get('state') in ["canceling", "canceled"])
示例#20
0
    def __init__(self, **kwargs):
        host = kwargs.get('host') if kwargs.get('host') else 'geobigdata.io'
        self.root_url = 'https://%s' % host

        if (kwargs.get('username') and kwargs.get('password')
                and kwargs.get('client_id') and kwargs.get('client_secret')):
            self.gbdx_connection = gbdx_auth.session_from_kwargs(**kwargs)
        elif kwargs.get('gbdx_connection'):
            # Pass in a custom gbdx connection object, for testing purposes
            self.gbdx_connection = kwargs.get('gbdx_connection')
        else:
            # This will throw an exception if your .ini file is not set properly
            self.gbdx_connection = gbdx_auth.get_session(
                kwargs.get('config_file'))

        # create a logger
        # for now, just log to the console. We'll replace all the 'print' statements
        # with at least logger.info or logger.debug statements
        # later, we can log to a service, file, or some other aggregator
        self.logger = logging.getLogger('gbdxtools')
        self.logger.setLevel(logging.ERROR)
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.ERROR)
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        console_handler.setFormatter(formatter)
        self.logger.addHandler(console_handler)
        self.logger.info('Logger initialized')

        # create and store an instance of the GBDX s3 client
        self.s3 = S3(self)

        # create and store an instance of the GBDX Ordering Client
        self.ordering = Ordering(self)

        # create and store an instance of the GBDX Catalog Client
        self.catalog = Catalog(self)

        # create and store an instance of the GBDX Workflow Client
        self.workflow = Workflow(self)

        # create and store an instance of the Idaho Client
        self.idaho = Idaho(self)

        self.vectors = Vectors(self)

        self.task_registry = TaskRegistry(self)
示例#21
0
    def __init__(self, tasks, **kwargs):
        self.workflow = WF()
        self.name = kwargs.get('name', str(uuid.uuid4())[:8] )
        self.id = None
        self.callback = kwargs.get('callback', None )

        self.definition = None

        self.tasks = tasks

        batch_values = []

        for task in self.tasks:
            if task.batch_values:
                batch_values.extend(task.batch_values)

        if batch_values:
            self.batch_values = batch_values
        else:
            self.batch_values = None
示例#22
0
 def test_init(self):
     wf = Workflow(self.gbdx)
     self.assertTrue(isinstance(wf, Workflow))
     self.assertTrue(wf.s3 is not None)
     self.assertTrue(wf.gbdx_connection is not None)
示例#23
0
def test_list_tasks():
    wf = Workflow(gbdx)
    taskinfo = wf.list_tasks()
    assert taskinfo is not None
    assert 'HelloGBDX' in taskinfo['tasks']
示例#24
0
def test_init():
    wf = Workflow(gbdx)
    assert isinstance(wf, Workflow)
    assert wf.s3 is not None
    assert wf.gbdx_connection is not None
示例#25
0
class Workflow(object):
    def __init__(self, tasks, **kwargs):
        self.workflow = WF()
        self.name = kwargs.get('name', str(uuid.uuid4())[:8] )
        self.id = None
        self.callback = kwargs.get('callback', None )

        self.definition = None

        self.tasks = tasks

        batch_values = []

        for task in self.tasks:
            if task.batch_values:
                batch_values.extend(task.batch_values)

        if batch_values:
            self.batch_values = batch_values
        else:
            self.batch_values = None

    def savedata(self, output, location=None):
        '''
        Save output data from any task in this workflow to S3

        Args:
               output: Reference task output (e.g. task.inputs.output1).

               location (optional): Subfolder under which the output will be saved.
                                    It will be placed under the account directory in gbd-customer-data bucket:
                                    s3://gbd-customer-data/{account_id}/{location}
                                    Leave blank to save to: workflow_output/{workflow_id}/{task_name}/{port_name}

        Returns:
            None
        '''

        output.persist = True
        if location:
            output.persist_location = location

    def workflow_skeleton(self):
        return {
            "tasks": [],
            "name": self.name
        }

    def list_workflow_outputs(self):
        '''
        Get a list of outputs from the workflow that are saved to S3. To get resolved locations call workflow status.
        Args:
            None

        Returns:
            list
        '''
        workflow_outputs = []
        for task in self.tasks:
            for output_port_name in task.outputs._portnames:
                if task.outputs.__getattribute__(output_port_name).persist:
                    workflow_outputs.append(task.name + ':' + output_port_name)

        return workflow_outputs

    def generate_workflow_description(self):
        '''
        Generate workflow json for launching the workflow against the gbdx api

        Args:
            None

        Returns:
            json string
        '''
        if not self.tasks:
            raise WorkflowError('Workflow contains no tasks, and cannot be executed.')

        self.definition = self.workflow_skeleton()

        if self.batch_values:
            self.definition["batch_values"] = self.batch_values

        all_input_port_values = [t.inputs.__getattribute__(input_port_name).value for t in self.tasks for
                                 input_port_name in t.inputs._portnames]
        for task in self.tasks:
            # only include multiplex output ports in this task if other tasks refer to them in their inputs.
            # 1. find the multplex output port_names in this task
            # 2. see if they are referred to in any other tasks inputs
            # 3. If not, exclude them from the workflow_def
            output_multiplex_ports_to_exclude = []
            multiplex_output_port_names = [portname for portname in task.outputs._portnames if
                                           task.outputs.__getattribute__(portname).is_multiplex]
            for p in multiplex_output_port_names:
                output_port_reference = 'source:' + task.name + ':' + p
                if output_port_reference not in all_input_port_values:
                    output_multiplex_ports_to_exclude.append(p)

            task_def = task.generate_task_workflow_json(
                output_multiplex_ports_to_exclude=output_multiplex_ports_to_exclude)
            self.definition['tasks'].append(task_def)

        if self.callback:
            self.definition['callback'] = self.callback

        return self.definition

    def execute(self):
        '''
        Execute the workflow.

        Args:
            None

        Returns:
            Workflow_id
        '''
        # if not self.tasks:
        #     raise WorkflowError('Workflow contains no tasks, and cannot be executed.')

        # for task in self.tasks:
        #     self.definition['tasks'].append( task.generate_task_workflow_json() )

        self.generate_workflow_description()

        # hit batch workflow endpoint if batch values
        if self.batch_values:
            self.id = self.workflow.launch_batch_workflow(self.definition)

        # use regular workflow endpoint if no batch values
        else:
            self.id = self.workflow.launch(self.definition)

        return self.id

    @property
    def task_ids(self):
        '''
        Get the task IDs of a running workflow

        Args:
            None

        Returns:
            List of task IDs
        '''
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot get task IDs.')

        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for task IDs.")

        wf = self.workflow.get(self.id)

        return [task['id'] for task in wf['tasks']]

    @task_ids.setter
    def task_ids(self, value):
        raise NotImplementedError("Cannot set workflow task_ids, readonly.")


    def cancel(self):
        '''
        Cancel a running workflow.

        Args:
            None

        Returns:
            None
        '''
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot cancel.')

        if self.batch_values:
            self.workflow.batch_workflow_cancel(self.id)
        else:
            self.workflow.cancel(self.id)

    @property
    def status(self):
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot check status.')

        if self.batch_values:
            status = self.workflow.batch_workflow_status(self.id)
        else:
            status = self.workflow.status(self.id)

        return status

    @status.setter
    def status(self, value):
        raise NotImplementedError("Cannot set workflow status, readonly.")

    @property
    def events(self):
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot check status.')
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for Events")
        return self.workflow.events(self.id)

    @events.setter
    def events(self, value):
        raise NotImplementedError("Cannot set workflow events, readonly.")

    @property
    def complete(self):
        if not self.id:
            return False

        # check if all sub workflows are either done, failed, or timedout
        if self.batch_values:
            return all(workflow.get("state") in ["succeeded", "failed", "timedout"] for workflow in
                       self.status['workflows'])
        else:
            return self.status['state'] == 'complete'

    @complete.setter
    def complete(self, value):
        raise NotImplementedError("Cannot set workflow complete, readonly.")

    @property
    def failed(self):
        if not self.id:
            return False
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for Current State")
        status = self.status
        return status['state'] == 'complete' and status['event'] == 'failed'

    @failed.setter
    def failed(self, value):
        raise NotImplementedError("Cannot set workflow failed, readonly.")

    @property
    def canceled(self):
        if not self.id:
            return False
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for Current State")
        status = self.status
        return status['state'] == 'complete' and status['event'] == 'canceled'

    @canceled.setter
    def canceled(self, value):
        raise NotImplementedError("Cannot set workflow canceled, readonly.")

    @property
    def succeeded(self):
        if not self.id:
            return False

        # check if all sub workflows are succeeded
        if self.batch_values:
            return all(workflow.get("state") == "succeeded" for workflow in self.status['workflows'])

        status = self.status
        return status['state'] == 'complete' and status['event'] == 'succeeded'

    @succeeded.setter
    def succeeded(self, value):
        raise NotImplementedError("Cannot set workflow succeeded, readonly.")

    @property
    def running(self):
        if not self.id:
            return False
        if self.batch_values:
            # check if any sub workflows are running
            return any(workflow.get("state") not in ["succeeded", "failed", "timedout"] for workflow in
                       self.status['workflows'])
        status = self.status
        return status['state'] == 'running' and status['event'] == 'started'

    @running.setter
    def running(self, value):
        raise NotImplementedError("Cannot set workflow running, readonly.")

    @property
    def timedout(self):
        if not self.id:
            return False
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for Current State")
        status = self.status
        return status['state'] == 'complete' and status['event'] == 'timedout'

    @timedout.setter
    def timedout(self, value):
        raise NotImplementedError("Cannot set workflow timedout, readonly.")

    @property
    def stdout(self):
        '''
        Get stdout from all the tasks of a workflow.

        Args:
            None

        Returns:
            List of tasks with their stdout, formatted like this:
            [
                {
                    "id": "4488895771403082552",
                    "taskType": "AOP_Strip_Processor",
                    "name": "Task1",
                    "stdout": "............"
                }
            ]
        '''
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot get stdout.')
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for stdout.")

        wf = self.workflow.get(self.id)

        stdout_list = []
        for task in wf['tasks']:
            stdout_list.append(
                {
                    'id': task['id'],
                    'taskType': task['taskType'],
                    'name': task['name'],
                    'stdout': self.workflow.get_stdout(self.id, task['id'])
                }
            )

        return stdout_list

    @stdout.setter
    def stdout(self, value):
        raise NotImplementedError("Cannot set workflow stdout, readonly.")

    @property
    def stderr(self):
        '''
        Get stderr from all the tasks of a workflow.

        Args:
            None

        Returns:
            List of tasks with their stderr, formatted like this:
            [
                {
                    "id": "4488895771403082552",
                    "taskType": "AOP_Strip_Processor",
                    "name": "Task1",
                    "stderr": "............"
                }
            ]
        '''
        if not self.id:
            raise WorkflowError('Workflow is not running.  Cannot get stderr.')
        if self.batch_values:
            raise NotImplementedError("Query Each Workflow Id within the Batch Workflow for stderr.")

        wf = self.workflow.get(self.id)

        stderr_list = []
        for task in wf['tasks']:
            stderr_list.append(
                {
                    'id': task['id'],
                    'taskType': task['taskType'],
                    'name': task['name'],
                    'stderr': self.workflow.get_stderr(self.id, task['id'])
                }
            )

        return stderr_list

    @stderr.setter
    def stderr(self, value):
        raise NotImplementedError("Cannot set workflow stderr, readonly.")
示例#26
0
 def test_list_tasks(self):
     wf = Workflow(self.gbdx)
     taskinfo = wf.list_tasks()
     self.assertTrue(taskinfo is not None)
     self.assertTrue('HelloGBDX' in taskinfo['tasks'])
示例#27
0
def test_list_tasks():
    wf = Workflow(gbdx)
    taskinfo = wf.list_tasks()
    assert taskinfo is not None
    assert 'HelloGBDX' in taskinfo['tasks']