示例#1
0
 def test_workflow_schema(self):
     state = None
     wf = Workflow(data=WorkflowData('test-workflow', 'ABC123', state=state))
     obj_before = wf.to_dict()
     wf = default_and_validate(wf, workflow_schema())
     # state should be defaulted to INACTIVE
     self.assertNotEqual(obj_before, wf.to_dict())
示例#2
0
 def test_workflow_schema(self):
     state = None
     wf = Workflow(
         data=WorkflowData('test-workflow', 'ABC123', state=state))
     obj_before = wf.to_dict()
     wf = default_and_validate(wf, workflow_schema())
     # state should be defaulted to INACTIVE
     self.assertNotEqual(obj_before, wf.to_dict())
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst0 = Datastore(data=DatastoreData('test-datastore0',
                                            'no_op_engine',
                                            args=dst_args,
                                            state=DatastoreState.TEMPLATE))
        self.datastore0 = self.dart.save_datastore(dst0)
        dst1 = Datastore(data=DatastoreData('test-datastore1',
                                            'no_op_engine',
                                            args=dst_args,
                                            state=DatastoreState.TEMPLATE))
        self.datastore1 = self.dart.save_datastore(dst1)

        wf0 = Workflow(data=WorkflowData(
            'test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE))
        self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id)
        wf1 = Workflow(data=WorkflowData(
            'test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE))
        self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id)

        a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        a01 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        self.action00, self.action01 = self.dart.save_actions(
            [a00, a01], workflow_id=self.workflow0.id)

        a10 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        a11 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                     NoOpActionTypes.action_that_succeeds.name,
                                     state=ActionState.TEMPLATE))
        self.action10, self.action11 = self.dart.save_actions(
            [a10, a11], workflow_id=self.workflow1.id)

        tr_args = {'completed_workflow_id': self.workflow0.id}
        tr = Trigger(data=TriggerData('test-trigger', 'workflow_completion',
                                      None, tr_args, TriggerState.ACTIVE))
        self.trigger = self.dart.save_trigger(tr)

        st_args = {
            'fire_after': 'ALL',
            'completed_trigger_ids': [self.trigger.id]
        }
        st = Trigger(data=TriggerData('test-super-trigger', 'super',
                                      [self.workflow1.id], st_args,
                                      TriggerState.ACTIVE))
        self.super_trigger = self.dart.save_trigger(st)
示例#4
0
def put_workflow(workflow):
    """ :type workflow: dart.model.workflow.Workflow """
    updated_workflow = Workflow.from_dict(request.get_json())
    if updated_workflow.data.state not in [WorkflowState.ACTIVE, WorkflowState.INACTIVE]:
        return {'results': 'ERROR', 'error_message': 'state must be ACTIVE or INACTIVE'}, 400, None
    workflow = workflow_service().update_workflow_state(workflow, updated_workflow.data.state)
    return {'results': workflow.to_dict()}
示例#5
0
 def setUp(self):
     self.dart = Dart(host='localhost', port=5000)
     args = {'action_sleep_time_in_seconds': 0}
     dst = Datastore(data=DatastoreData('test-datastore', 'no_op_engine', args=args, state=DatastoreState.TEMPLATE))
     self.datastore = self.dart.save_datastore(dst)
     wf = Workflow(data=WorkflowData('test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
     self.workflow = self.dart.save_workflow(wf, self.datastore.id)
示例#6
0
    def test_crud(self):
        wf = Workflow(data=WorkflowData(name='test-workflow',
                                        datastore_id=self.datastore.id,
                                        engine_name='no_op_engine'))
        posted_wf = self.dart.save_workflow(wf, self.datastore.id)
        self.assertEqual(posted_wf.data.to_dict(), wf.data.to_dict())

        workflow = self.dart.get_workflow(posted_wf.id)
        self.assertEqual(posted_wf.to_dict(), workflow.to_dict())

        workflow.data.concurrency = 2
        workflow.data.state = WorkflowState.ACTIVE
        put_workflow = self.dart.save_workflow(workflow)
        self.assertEqual(put_workflow.data.concurrency, 2)
        self.assertEqual(put_workflow.data.state, WorkflowState.ACTIVE)
        self.assertNotEqual(posted_wf.to_dict(), put_workflow.to_dict())

        self.dart.delete_workflow(workflow.id)
        try:
            self.dart.get_workflow(workflow.id)
        except DartRequestException as e:
            self.assertEqual(e.response.status_code, 404)
            return

        self.fail('workflow should have been missing after delete!')
示例#7
0
    def test_workflow_schema_invalid(self):
        with self.assertRaises(DartValidationException) as context:
            name = None
            wf = Workflow(data=WorkflowData(name, 'ABC123'))
            # should fail because the name is missing
            default_and_validate(wf, workflow_schema())

        self.assertTrue(isinstance(context.exception, DartValidationException))
示例#8
0
def post_workflow(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    workflow = Workflow.from_dict(request.get_json())
    workflow.data.datastore_id = datastore.id
    workflow.data.engine_name = datastore.data.engine_name
    if datastore.data.state == DatastoreState.ACTIVE:
        # only templated datastores can use concurrencies > 1
        workflow.data.concurrency = 1
    workflow = workflow_service().save_workflow(workflow)
    return {'results': workflow.to_dict()}
示例#9
0
def post_workflow(datastore):
    """ :type datastore: dart.model.datastore.Datastore """
    workflow = Workflow.from_dict(request.get_json())
    workflow.data.datastore_id = datastore.id
    workflow.data.engine_name = datastore.data.engine_name
    if datastore.data.state == DatastoreState.ACTIVE:
        # only templated datastores can use concurrencies > 1
        workflow.data.concurrency = 1
    workflow = workflow_service().save_workflow(workflow)
    return {'results': workflow.to_dict()}
示例#10
0
文件: resolve.py 项目: chrisborg/dart
 def _resolve_and_save_workflow(self, entity_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id):
     actual_id, unsaved_id = self._resolve(EntityType.workflow, entity_id, entity_map, actual_entities_by_unsaved_id)
     if actual_id:
         return actual_id
     node_id = self._node_id(EntityType.workflow, unsaved_id)
     workflow = Workflow.from_dict(entity_map['unsaved_entities'][node_id])
     assert isinstance(workflow, Workflow)
     workflow.data.datastore_id = self._resolve_and_save_datastore(workflow.data.datastore_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id)
     workflow = self._workflow_service.save_workflow(workflow, commit=False, flush=True)
     actual_entities_by_node_id[node_id] = workflow
     actual_entities_by_unsaved_id[unsaved_id] = workflow
     return workflow.id
示例#11
0
文件: resolve.py 项目: ophiradi/dart
 def _resolve_and_save_workflow(self, entity_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id):
     actual_id, unsaved_id = self._resolve(EntityType.workflow, entity_id, entity_map, actual_entities_by_unsaved_id)
     if actual_id:
         return actual_id
     node_id = self._node_id(EntityType.workflow, unsaved_id)
     workflow = Workflow.from_dict(entity_map['unsaved_entities'][node_id])
     assert isinstance(workflow, Workflow)
     workflow.data.datastore_id = self._resolve_and_save_datastore(workflow.data.datastore_id, entity_map, actual_entities_by_node_id, actual_entities_by_unsaved_id)
     workflow = self._workflow_service.save_workflow(workflow, commit=False, flush=True)
     actual_entities_by_node_id[node_id] = workflow
     actual_entities_by_unsaved_id[unsaved_id] = workflow
     return workflow.id
示例#12
0
def put_workflow(workflow):
    """ :type workflow: dart.model.workflow.Workflow """
    updated_workflow = Workflow.from_dict(request.get_json())
    if updated_workflow.data.state not in [
            WorkflowState.ACTIVE, WorkflowState.INACTIVE
    ]:
        return {
            'results': 'ERROR',
            'error_message': 'state must be ACTIVE or INACTIVE'
        }, 400, None
    workflow = workflow_service().update_workflow_state(
        workflow, updated_workflow.data.state)
    return {'results': workflow.to_dict()}
示例#13
0
 def setUp(self):
     self.dart = Dart(host='localhost', port=5000)
     args = {'action_sleep_time_in_seconds': 0}
     dst = Datastore(data=DatastoreData(name='test-datastore',
                                        engine_name='no_op_engine',
                                        args=args,
                                        state=DatastoreState.TEMPLATE))
     self.datastore = self.dart.save_datastore(dst)
     wf = Workflow(data=WorkflowData(name='test-workflow',
                                     datastore_id=self.datastore.id))
     self.workflow = self.dart.save_workflow(workflow=wf,
                                             datastore_id=self.datastore.id)
     self.maxDiff = 99999
示例#14
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [
            Column('c1', DataType.VARCHAR, 50),
            Column('c2', DataType.BIGINT)
        ]
        df = DataFormat(FileFormat.TEXTFILE, RowFormat.DELIMITED)
        dataset_data = DatasetData(
            name='test-dataset',
            table_name='test_dataset_table',
            load_type=LoadType.INSERT,
            location=('s3://' + os.environ['DART_TEST_BUCKET'] + '/impala'),
            data_format=df,
            columns=cs,
            tags=[])
        self.dataset = self.dart.save_dataset(Dataset(data=dataset_data))

        start = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/impala'
        end = 's3://' + os.environ['DART_TEST_BUCKET'] + '/impala/install'
        regex = '.*\\.rpm'
        ds = Subscription(data=SubscriptionData(
            'test-subscription', self.dataset.id, start, end, regex))
        self.subscription = self.dart.save_subscription(ds)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'no_op_engine',
                                           args=dst_args,
                                           state=DatastoreState.TEMPLATE))
        self.datastore = self.dart.save_datastore(dst)

        wf = Workflow(data=WorkflowData(
            'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
        self.workflow = self.dart.save_workflow(wf, self.datastore.id)

        a_args = {'subscription_id': self.subscription.id}
        a0 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name,
                                    NoOpActionTypes.action_that_succeeds.name,
                                    state=ActionState.TEMPLATE))
        a1 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name,
                                    NoOpActionTypes.consume_subscription.name,
                                    a_args,
                                    state=ActionState.TEMPLATE))
        self.action0, self.action1 = self.dart.save_actions(
            [a0, a1], workflow_id=self.workflow.id)
示例#15
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore',
                                           'no_op_engine',
                                           args=dst_args,
                                           state=DatastoreState.ACTIVE))
        self.datastore = self.dart.save_datastore(dst)

        wf = Workflow(data=WorkflowData(
            'test-workflow', self.datastore.id, state=WorkflowState.ACTIVE))
        self.workflow = self.dart.save_workflow(wf, self.datastore.id)

        a = Action(data=ActionData(NoOpActionTypes.action_that_fails.name,
                                   NoOpActionTypes.action_that_fails.name,
                                   state=ActionState.TEMPLATE))
        self.dart.save_actions([a], workflow_id=self.workflow.id)
示例#16
0
def add_no_op_engine_sub_graphs(config):
    engine_config = config['engines']['no_op_engine']
    opts = engine_config['options']
    dart = Dart(opts['dart_host'], opts['dart_port'], opts['dart_api_version'])
    assert isinstance(dart, Dart)

    _logger.info('saving no_op_engine sub_graphs')

    engine_id = None
    for e in dart.get_engines():
        if e.data.name == 'no_op_engine':
            engine_id = e.id
    if not engine_id:
        raise

    subgraph_definitions = [
        SubGraphDefinition(data=SubGraphDefinitionData(
            name='workflow chaining demo',
            description='demonstrate workflow chaining',
            engine_name='no_op_engine',
            related_type=EntityType.datastore,
            related_is_a=Relationship.PARENT,
            workflows=[
                Workflow(id=Ref.workflow(1),
                         data=WorkflowData(
                             name='no-op-workflow-chaining-wf1',
                             datastore_id=Ref.parent(),
                             engine_name='no_op_engine',
                             state=WorkflowState.ACTIVE,
                         )),
                Workflow(id=Ref.workflow(2),
                         data=WorkflowData(
                             name='no-op-workflow-chaining-wf2',
                             datastore_id=Ref.parent(),
                             engine_name='no_op_engine',
                             state=WorkflowState.ACTIVE,
                         )),
            ],
            actions=[
                Action(id=Ref.action(1),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           engine_name='no_op_engine',
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           workflow_id=Ref.workflow(1),
                           order_idx=1,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(2),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(1),
                           order_idx=2,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(3),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(1),
                           order_idx=3,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(4),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(1),
                           order_idx=4,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(5),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(2),
                           order_idx=1,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(6),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_succeeds.name,
                           action_type_name=NoOpActionTypes.
                           action_that_succeeds.name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(2),
                           order_idx=2,
                           state=ActionState.TEMPLATE,
                       )),
                Action(id=Ref.action(7),
                       data=ActionData(
                           name=NoOpActionTypes.action_that_fails.name,
                           action_type_name=NoOpActionTypes.action_that_fails.
                           name,
                           engine_name='no_op_engine',
                           workflow_id=Ref.workflow(2),
                           order_idx=3,
                           state=ActionState.TEMPLATE,
                       )),
            ],
            triggers=[
                Trigger(id=Ref.trigger(1),
                        data=TriggerData(
                            name='no-op-trigger-workflow-completion',
                            trigger_type_name=workflow_completion_trigger.name,
                            workflow_ids=[Ref.workflow(2)],
                            state=TriggerState.ACTIVE,
                            args={'completed_workflow_id': Ref.workflow(1)})),
            ],
        ))
    ]

    for e in subgraph_definitions:
        s = dart.save_subgraph_definition(e, engine_id)
        _logger.info('created subgraph_definition: %s' % s.id)
示例#17
0
def _get_static_subgraphs_by_related_type(engine, graph_entity_service):
    engine_name = engine.data.name
    sub_graph_map = {EntityType.workflow: []}

    for action_type in engine.data.supported_action_types:
        entity_models = graph_entity_service.to_entity_models_with_randomized_ids(
            [
                Action(
                    id=Ref.action(1),
                    data=ActionData(
                        name=action_type.name,
                        action_type_name=action_type.name,
                        engine_name=engine_name,
                        workflow_id=Ref.parent(),
                        state=ActionState.TEMPLATE,
                        args={} if action_type.params_json_schema else None))
            ])
        sub_graph_map[EntityType.workflow].append(
            SubGraph(
                name=action_type.name,
                description=action_type.description,
                related_type=EntityType.workflow,
                related_is_a=Relationship.PARENT,
                graph=graph_entity_service.to_graph(None, entity_models),
                entity_map=graph_entity_service.to_entity_map(entity_models),
                icon='●',
            ))

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Trigger(id=Ref.trigger(1),
                data=TriggerData(
                    name='%s_trigger' % workflow_completion_trigger.name,
                    trigger_type_name=workflow_completion_trigger.name,
                    state=TriggerState.INACTIVE,
                    workflow_ids=[],
                    args={'completed_workflow_id': Ref.parent()}))
    ])
    sub_graph_map[EntityType.workflow].extend([
        SubGraph(
            name='workflow completion trigger',
            description='create a new workflow_completion trigger entity',
            related_type=EntityType.workflow,
            related_is_a=Relationship.PARENT,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='▼',
        ),
    ])

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Trigger(id=Ref.trigger(1),
                data=TriggerData(
                    name='%s_trigger' % scheduled_trigger.name,
                    trigger_type_name=scheduled_trigger.name,
                    state=TriggerState.INACTIVE,
                    workflow_ids=[Ref.child()],
                ))
    ])
    sub_graph_map[EntityType.workflow].extend([
        SubGraph(
            name='scheduled trigger',
            description='create a new scheduled trigger entity',
            related_type=EntityType.workflow,
            related_is_a=Relationship.CHILD,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='▼',
        ),
    ])

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Trigger(id=Ref.trigger(1),
                data=TriggerData(
                    name='%s_trigger' % super_trigger.name,
                    trigger_type_name=super_trigger.name,
                    state=TriggerState.INACTIVE,
                    workflow_ids=[Ref.child()],
                ))
    ])
    sub_graph_map[EntityType.workflow].extend([
        SubGraph(
            name='super trigger',
            description='create a new super trigger entity',
            related_type=EntityType.workflow,
            related_is_a=Relationship.CHILD,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='▼',
        ),
    ])

    entity_models = graph_entity_service.to_entity_models_with_randomized_ids([
        Workflow(id=Ref.workflow(1),
                 data=WorkflowData(name='workflow',
                                   datastore_id=Ref.parent(),
                                   engine_name=engine_name,
                                   state=WorkflowState.INACTIVE))
    ])
    sub_graph_map[EntityType.datastore] = [
        SubGraph(
            name='workflow',
            description='create a new workflow entity',
            related_type=EntityType.datastore,
            related_is_a=Relationship.PARENT,
            graph=graph_entity_service.to_graph(None, entity_models),
            entity_map=graph_entity_service.to_entity_map(entity_models),
            icon='◆',
        )
    ]

    for action_type in engine.data.supported_action_types:
        entity_models = graph_entity_service.to_entity_models_with_randomized_ids(
            [
                Action(
                    id=Ref.action(1),
                    data=ActionData(
                        name=action_type.name,
                        action_type_name=action_type.name,
                        engine_name=engine_name,
                        datastore_id=Ref.parent(),
                        state=ActionState.HAS_NEVER_RUN,
                        args={} if action_type.params_json_schema else None))
            ])
        sub_graph_map[EntityType.datastore].append(
            SubGraph(
                name=action_type.name,
                description=action_type.description,
                related_type=EntityType.datastore,
                related_is_a=Relationship.PARENT,
                graph=graph_entity_service.to_graph(None, entity_models),
                entity_map=graph_entity_service.to_entity_map(entity_models),
                icon='●',
            ))

    return sub_graph_map
示例#18
0
    datastore = dart.save_datastore(Datastore(
        data=DatastoreData(
            'beacon_native_app_impala',
            'emr_engine',
            state=DatastoreState.TEMPLATE,
            args={'data_to_freespace_ratio': 0.25}
        )
    ))
    print 'created datastore: %s' % datastore.id

    workflow = dart.save_workflow(Workflow(
        data=WorkflowData(
            'load_beacon_native_app_impala',
            datastore.id,
            state=WorkflowState.ACTIVE,
            on_failure_email=['*****@*****.**'],
            on_success_email=['*****@*****.**'],
            on_started_email=['*****@*****.**'],
        )
    ), datastore.id)
    print 'created workflow: %s' % workflow.id

    a0, a1 = dart.save_actions([
        Action(data=ActionData('start_datastore', 'start_datastore', state=ActionState.TEMPLATE)),
        Action(data=ActionData('load_dataset', 'load_dataset', state=ActionState.TEMPLATE, args={
            'dataset_id': dataset.id,
            's3_path_start_prefix_inclusive': 's3://example-bucket/prd/beacon/native_app/v2/parquet/snappy/createdpartition=2015-06-27',
        })),
    ], workflow_id=workflow.id)
    print 'created action: %s' % a0.id
    print 'created action: %s' % a1.id
示例#19
0
                'dataset_id': dataset.id,
                's3_path_end_prefix_exclusive':
                's3://example-bucket/prd/inbound/overlord/eu-all-events/2015/08/05/',
                'target_file_format': FileFormat.PARQUET,
                'target_row_format': RowFormat.NONE,
                'target_compression': Compression.SNAPPY,
            })),
    ],
                               datastore_id=datastore.id)
    print 'created action: %s' % a0.id
    print 'created action: %s' % a1.id

    workflow = dart.save_workflow(workflow=Workflow(data=WorkflowData(
        name='owen_eu_parquet_workflow_DW-3213_v3',
        datastore_id=datastore.id,
        state=WorkflowState.ACTIVE,
        on_failure_email=['*****@*****.**'],
        on_success_email=['*****@*****.**'],
        on_started_email=['*****@*****.**'],
    )),
                                  datastore_id=datastore.id)
    print 'created workflow: %s' % workflow.id

    a2 = dart.save_actions(actions=[
        Action(data=ActionData('consume_subscription',
                               'consume_subscription',
                               state=ActionState.TEMPLATE,
                               args={
                                   'subscription_id': subscription.id,
                                   'target_file_format': FileFormat.PARQUET,
                                   'target_row_format': RowFormat.NONE,
                                   'target_compression': Compression.SNAPPY,
示例#20
0
def put_workflow(workflow):
    """ :type workflow: dart.model.workflow.Workflow """
    return update_workflow(workflow, Workflow.from_dict(request.get_json()))
示例#21
0
                'dataset_id': '34HWJLF5N9',
                's3_path_end_prefix_exclusive':
                's3://example-bucket/prd/inbound/overlord/raw/rmndirect/2015/08/18/',
                'target_file_format': FileFormat.PARQUET,
                'target_row_format': RowFormat.NONE,
                'target_compression': Compression.SNAPPY,
            })),
    ],
                                datastore_id=datastore.id)
    print 'created action: %s' % actions[0].id
    print 'created action: %s' % actions[1].id

    workflow = dart.save_workflow(workflow=Workflow(data=WorkflowData(
        name='rmn_direct_workflow_DW-3307',
        datastore_id=datastore.id,
        state=WorkflowState.ACTIVE,
        on_failure_email=['*****@*****.**'],
        on_success_email=['*****@*****.**'],
        on_started_email=['*****@*****.**'],
    )),
                                  datastore_id=datastore.id)
    print 'created workflow: %s' % workflow.id

    wf_actions = dart.save_actions(actions=[
        Action(data=ActionData('consume_subscription',
                               'consume_subscription',
                               state=ActionState.TEMPLATE,
                               args={
                                   'subscription_id': subscription.id,
                                   'target_file_format': FileFormat.PARQUET,
                                   'target_row_format': RowFormat.NONE,
                                   'target_compression': Compression.SNAPPY,
示例#22
0
def patch_workflow(workflow):
    """ :type workflow: dart.model.workflow.Workflow """
    p = JsonPatch(request.get_json())
    return update_workflow(workflow,
                           Workflow.from_dict(p.apply(workflow.to_dict())))
示例#23
0
def put_workflow(workflow):
    """ :type workflow: dart.model.workflow.Workflow """
    return update_workflow(workflow, Workflow.from_dict(request.get_json()))
示例#24
0
def patch_workflow(workflow):
    """ :type workflow: dart.model.workflow.Workflow """
    p = JsonPatch(request.get_json())
    return update_workflow(workflow, Workflow.from_dict(p.apply(workflow.to_dict())))
示例#25
0
    def setUp(self):
        dart = Dart(host='localhost', port=5000)
        """ :type dart: dart.client.python.dart_client.Dart """
        self.dart = dart

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset0', 'test_dataset_table0', 's3://test/dataset/0/%s' + random_id(), df, cs)
        self.dataset0 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset1_location = 's3://test/dataset/1/%s' + random_id()
        dataset_data = DatasetData('test-dataset1', 'test_dataset_table1', dataset1_location, df, cs)
        self.dataset1 = self.dart.save_dataset(Dataset(data=dataset_data))

        cs = [Column('c1', DataType.VARCHAR, 50), Column('c2', DataType.BIGINT)]
        df = DataFormat(FileFormat.PARQUET, RowFormat.NONE)
        dataset_data = DatasetData('test-dataset2-no-show', 'test_dataset_table2', 's3://test/dataset/2/%s' + random_id(), df, cs)
        self.dataset2 = self.dart.save_dataset(Dataset(data=dataset_data))

        s = Subscription(data=SubscriptionData('test-subscription0', self.dataset0.id))
        self.subscription0 = self.dart.save_subscription(s)

        s = Subscription(data=SubscriptionData('test-subscription2-no-show', self.dataset2.id))
        self.subscription2 = self.dart.save_subscription(s)

        dst_args = {'action_sleep_time_in_seconds': 0}
        dst = Datastore(data=DatastoreData('test-datastore0', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore0 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore1', 'no_op_engine', args=dst_args, state=DatastoreState.TEMPLATE))
        self.datastore1 = self.dart.save_datastore(dst)
        dst = Datastore(data=DatastoreData('test-datastore2-no-show', 'no_op_engine', args=dst_args, state=DatastoreState.ACTIVE))
        self.datastore2 = self.dart.save_datastore(dst)

        wf0 = Workflow(data=WorkflowData('test-workflow0', self.datastore0.id, state=WorkflowState.ACTIVE))
        self.workflow0 = self.dart.save_workflow(wf0, self.datastore0.id)
        wf1 = Workflow(data=WorkflowData('test-workflow1', self.datastore1.id, state=WorkflowState.ACTIVE))
        self.workflow1 = self.dart.save_workflow(wf1, self.datastore1.id)
        wf2 = Workflow(data=WorkflowData('test-workflow2-no-show', self.datastore2.id, state=WorkflowState.ACTIVE))
        self.workflow2 = self.dart.save_workflow(wf2, self.datastore2.id)

        a_args = {'source_hdfs_path': 'hdfs:///user/hive/warehouse/test', 'destination_s3_path': dataset1_location}
        a00 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a01 = Action(data=ActionData(NoOpActionTypes.consume_subscription.name, NoOpActionTypes.consume_subscription.name, {'subscription_id': self.subscription0.id}, state=ActionState.TEMPLATE))
        a02 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        a03 = Action(data=ActionData(NoOpActionTypes.copy_hdfs_to_s3_action.name, NoOpActionTypes.copy_hdfs_to_s3_action.name, a_args, state=ActionState.TEMPLATE))
        a04 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.TEMPLATE))
        self.action00, self.action01, self.action02, self.action03, self.action04 = \
            self.dart.save_actions([a00, a01, a02, a03, a04], workflow_id=self.workflow0.id)

        a10 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset1.id}, state=ActionState.TEMPLATE))
        self.action10 = self.dart.save_actions([a10], workflow_id=self.workflow1.id)

        a20 = Action(data=ActionData(NoOpActionTypes.action_that_succeeds.name, NoOpActionTypes.action_that_succeeds.name, state=ActionState.HAS_NEVER_RUN))
        a21 = Action(data=ActionData(NoOpActionTypes.load_dataset.name, NoOpActionTypes.load_dataset.name, {'dataset_id': self.dataset2.id}, state=ActionState.TEMPLATE))
        self.action20 = self.dart.save_actions([a20], datastore_id=self.datastore2.id)
        self.action21 = self.dart.save_actions([a21], workflow_id=self.workflow2.id)

        self.event1 = self.dart.save_event(Event(data=EventData('test-event1', state=EventState.ACTIVE)))
        self.event2 = self.dart.save_event(Event(data=EventData('test-event2-no-show', state=EventState.ACTIVE)))

        tr_args = {'event_id': self.event1.id}
        tr = Trigger(data=TriggerData('test-event-trigger1', 'event', [self.workflow1.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger1 = self.dart.save_trigger(tr)

        tr_args = {'event_id': self.event2.id}
        tr = Trigger(data=TriggerData('test-event-trigger2-no-show', 'event', [self.workflow2.id], tr_args, TriggerState.ACTIVE))
        self.event_trigger2 = self.dart.save_trigger(tr)

        st_args = {'fire_after': 'ALL', 'completed_trigger_ids': [self.event_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger1', 'super', None, st_args, TriggerState.ACTIVE))
        self.super_trigger1 = self.dart.save_trigger(st)

        st_args = {'fire_after': 'ANY', 'completed_trigger_ids': [self.super_trigger1.id]}
        st = Trigger(data=TriggerData('test-super-trigger2', 'super', [self.workflow1.id], st_args, TriggerState.ACTIVE))
        self.super_trigger2 = self.dart.save_trigger(st)
示例#26
0
    assert subscription.data.state == SubscriptionState.ACTIVE
    print 'done.'

    datastore = dart.save_datastore(
        Datastore(data=DatastoreData(name='weblogs_rmn_legacy',
                                     engine_name='emr_engine',
                                     state=DatastoreState.TEMPLATE,
                                     args={
                                         'data_to_freespace_ratio': 0.50,
                                     })))
    print 'created datastore: %s' % datastore.id

    workflow = dart.save_workflow(workflow=Workflow(data=WorkflowData(
        name='weblogs_rmn_legacy_parse_to_delimited',
        datastore_id=datastore.id,
        state=WorkflowState.ACTIVE,
        on_failure_email=['*****@*****.**', '*****@*****.**'],
        on_success_email=['*****@*****.**', '*****@*****.**'],
        on_started_email=['*****@*****.**', '*****@*****.**'],
    )),
                                  datastore_id=datastore.id)
    print 'created workflow: %s' % workflow.id

    a2 = dart.save_actions(actions=[
        Action(data=ActionData('consume_subscription',
                               'consume_subscription',
                               state=ActionState.TEMPLATE,
                               args={
                                   'subscription_id': subscription.id,
                                   'target_file_format': FileFormat.TEXTFILE,
                                   'target_row_format': RowFormat.DELIMITED,
                                   'target_compression': Compression.GZIP,