Пример #1
0
    def __init__(self, redis_conn, workflow_id, app_id, config):
        self.redis_conn = redis_conn
        self.state_control = StateControlRedis(self.redis_conn)
        self.workflow_id = workflow_id
        self.app_id = app_id
        self.config = config

        # Errors and messages
        self.MNN000 = ('MNN000', _('Success.'))
        self.MNN001 = ('MNN001', _('Port output format not supported.'))
        self.MNN002 = ('MNN002', _('Success getting data from task.'))
        self.MNN003 = ('MNN003', _('State does not exists, processing app.'))
        self.MNN004 = ('MNN004', _('Invalid port.'))
        self.MNN005 = ('MNN005',
                       _('Unable to retrieve data because a previous error.'))
        self.MNN006 = ('MNN006',
                       _('Invalid Python code or incorrect encoding: {}'))
        self.MNN007 = ('MNN007', _('Job {} was canceled'))
        self.MNN008 = ('MNN008', _('App {} was terminated'))
        self.MNN009 = ('MNN009', _('Workflow specification is missing'))
        self.MNN010 = (
            'MNN010',
            _('Task completed, but not executed (not used in the workflow).'))

        # Used in the template file, declared here to gettext detect them
        self.msgs = [
            _('Task running'),
            _('Task completed'),
            _('Task running (cached data)')
        ]
Пример #2
0
def test_minion_generate_output_success():
    workflow_id = 6666
    app_id = 897987

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        redis_conn = mocked_redis()
        minion = SparkMinion(redis_conn=redis_conn,
                             workflow_id=workflow_id,
                             app_id=app_id,
                             config=config)
        minion._emit_event = dummy_emit_event

        state_control = StateControlRedis(redis_conn)

        msgs = ["Message being sent \n{}", {'msg': 'Dictionary being sent'}]
        for msg in msgs:
            minion._generate_output(msg)
            result = json.loads(
                state_control.pop_app_output_queue(app_id, False))
            assert sorted(result.keys()) == sorted(
                ['date', 'status', 'message', 'workflow_id', 'app_id', 'code'])
            assert result['app_id'] == app_id
            assert result['message'] == msg
        assert state_control.get_app_output_queue_size(app_id) == 0
Пример #3
0
    def start(self):
        signal.signal(signal.SIGTERM, self._terminate_minions)
        log.info(_('Starting master process. Reading "start" queue'))

        parsed_url = urlparse(self.config['juicer']['servers']['redis_url'])
        redis_conn = redis.StrictRedis(host=parsed_url.hostname,
                                       port=parsed_url.port,
                                       decode_responses=True)

        # Start pending minions
        apps = [q.split('_')[-1] for q in redis_conn.keys('queue_app_*')]
        self.state_control = StateControlRedis(redis_conn)

        for app_id in apps:
            pending = redis_conn.lrange('queue_app_{}'.format(app_id), 0, 0)
            if pending and len(pending) > 0:
                msg = json.loads(pending[0])
                log.warn(_('Starting pending app_id {}').format(app_id))
                # FIXME: cluster
                cluster = msg['cluster']
                platform = msg['workflow']['platform']['slug']
                job_id = msg['job_id']

                self._start_minion(app_id,
                                   app_id,
                                   job_id,
                                   self.state_control,
                                   platform,
                                   cluster=cluster)
            else:
                log.warn(_("Pending queue is empty"))

        while True:
            self.read_start_queue(redis_conn)
Пример #4
0
    def watch_new_minion(self):
        try:
            log.info(_('Watching minions events.'))

            parsed_url = urlparse(
                self.config['juicer']['servers']['redis_url'])
            redis_conn = redis.StrictRedis(host=parsed_url.hostname,
                                           port=parsed_url.port)
            redis_conn.config_set('notify-keyspace-events', 'KE$gx')
            pub_sub = redis_conn.pubsub()
            pub_sub.psubscribe('__keyspace*__:key_minion_app*')
            for msg in pub_sub.listen():
                # print('|{}|'.format(msg.get('channel')))
                app_id = msg.get('channel', '').decode('utf8').split('_')[-1]
                if app_id.isdigit():
                    app_id = int(app_id)
                    key = (app_id, app_id)
                    data = msg.get('data', '')
                    if key in self.active_minions:
                        if data == b'del' or data == b'expired':
                            del self.active_minions[key]
                            log.info(_('Minion {} finished.').format(app_id))
                            pending = redis_conn.lrange(
                                'queue_app_{}'.format(app_id), 0, 0)
                            if pending:
                                log.warn(
                                    _('There are messages to process in app {} '
                                      'queue, starting minion.').format(
                                          app_id))
                                if self.state_control is None:
                                    self.state_control = StateControlRedis(
                                        redis_conn)
                                # FIXME: Cluster and platform and job_id
                                print('-' * 10)
                                print(pending)
                                print('-' * 10)
                                platform = 'spark'
                                self._start_minion(app_id, app_id, 0,
                                                   self.state_control,
                                                   platform)

                    elif data == b'set':
                        # Externally launched minion
                        minion_info = json.loads(
                            redis_conn.get('key_minion_app_{}'.format(
                                app_id)).decode('utf8'))
                        port = self._get_next_available_port()
                        self.active_minions[key] = {
                            'pid': minion_info.get('pid'),
                            'port': port
                        }
                        log.info(
                            _('Minion {} joined (pid: {}, port: {}).').format(
                                app_id, minion_info.get('pid'), port))
        except KeyboardInterrupt:
            pass
        except ConnectionError as cx:
            log.exception(cx)
            time.sleep(1)
Пример #5
0
    def read_start_queue(self, redis_conn):
        app_id = None
        try:
            self.state_control = StateControlRedis(redis_conn)
            # Process next message
            log.info(_('Reading "start" queue.'))
            msg = self.state_control.pop_start_queue()
            log.info(_('Forwarding message to minion.'))
            msg_info = json.loads(msg)

            # Extract message type and common parameters
            msg_type = msg_info['type']
            workflow_id = str(msg_info['workflow_id'])
            app_id = str(msg_info['app_id'])
            job_id = str(msg_info.get('job_id', 0))
            if msg_type in juicer_protocol.EXECUTE:
                platform = msg_info['workflow'].get('platform',
                                                    {}).get('slug', 'spark')
                cluster = msg_info['cluster']
                self._forward_to_minion(msg_type, workflow_id, app_id, job_id,
                                        msg, platform, cluster)

            elif msg_type == juicer_protocol.TERMINATE:
                cluster = msg_info.get('cluster')
                platform = msg_info.get('workflow',
                                        {}).get('platform',
                                                {}).get('slug', 'spark')
                # FIXME
                job_id = 0
                self._forward_to_minion(msg_type, workflow_id, app_id, job_id,
                                        msg, platform, cluster)
                self._terminate_minion(workflow_id, app_id)

            else:
                log.warn(_('Unknown message type %s'), msg_type)

        except ConnectionError as cx:
            log.exception(cx)
            time.sleep(1)

        except JuicerException as je:
            log.exception(je)
            if app_id:
                self.state_control.push_app_output_queue(
                    app_id, json.dumps({
                        'code': je.code,
                        'message': str(je)
                    }))
        except KeyboardInterrupt:
            pass
        except Exception as ex:
            log.exception(ex)
            if app_id:
                self.state_control.push_app_output_queue(
                    app_id, json.dumps({
                        'code': 500,
                        'message': str(ex)
                    }))
Пример #6
0
def test_minion_perform_execute_reload_code_success():
    workflow_id = '6666'
    app_id = '667788'
    job_id = '1'
    workflow = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'job_id': job_id,
        'type': 'execute',
        'workflow': ''
    }
    function_name = 'juicer.spark.transpiler.SparkTranspiler.transpile'

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch(function_name) as mocked_transpile:
            with mock.patch('juicer.workflow.workflow.Workflow'
                            '._build_initial_workflow_graph') as mocked_fn:
                mocked_fn.side_effect = lambda: ""
                # Setup for mocked_transpile
                mocked_transpile.side_effect = get_side_effect(None, None, 2)

                redis_conn = mocked_redis()
                minion = SparkMinion(redis_conn=redis_conn,
                                     workflow_id=workflow_id,
                                     app_id=app_id,
                                     config=config)
                minion.get_or_create_spark_session = \
                    dummy_get_or_create_spark_session
                minion._emit_event = dummy_emit_event
                # Configure mocked redis
                state_control = StateControlRedis(redis_conn)
                with open(
                        os.path.join(os.path.dirname(__file__),
                                     'fixtures/simple_workflow.json')) as f:
                    data = json.loads(f.read())
                    workflow['workflow'] = data

                state_control.push_app_queue(app_id, json.dumps(workflow))

                minion._process_message()
                assert minion._state == {'res': 'version 1.0'}, 'Invalid state'

                # Executes the same workflow, but code should be different
                state_control.push_app_queue(app_id, json.dumps(workflow))
                assert state_control.get_app_output_queue_size(
                    app_id) == 1, 'Wrong number of output messages'

                state_control.pop_app_queue(app_id, True, 0)
                minion.transpiler.transpile = get_side_effect(None, None, 3)
                minion._process_message()

                assert minion._state == {'res': 'version 2.1'}, 'Invalid state'

                assert state_control.get_app_output_queue_size(
                    app_id) == 2, 'Wrong number of output messages'
Пример #7
0
def test_minion_perform_deliver_missing_state_invalid_port_failure():
    workflow_id = '6666'
    app_id = '5001'
    job_id = '1'
    out_queue = 'queue_50001'
    task_id = 'f033f-284ab-28987e-232add'
    function_name = 'juicer.spark.transpiler.SparkTranspiler.transpile'

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch(function_name) as mocked_transpile:
            with mock.patch('juicer.workflow.workflow.Workflow'
                            '._build_initial_workflow_graph') as mocked_fn:
                mocked_fn.side_effect = lambda: ""
                # Setup for mocked_transpile
                mocked_transpile.side_effect = get_side_effect(
                    get_records(), task_id, 0)
                redis_conn = mocked_redis()
                state_control = StateControlRedis(redis_conn)

                data = {
                    'workflow_id': workflow_id,
                    'app_id': app_id,
                    'job_id': job_id,
                    'type': 'deliver',
                    'task_id': task_id,
                    'port': 'port2',  # This port is invalid
                    'output': out_queue,
                    'workflow': {
                        "tasks": [],
                        "flows": []
                    }
                }

                state_control.push_app_queue(app_id, json.dumps(data))
                minion = SparkMinion(redis_conn=redis_conn,
                                     workflow_id=workflow_id,
                                     app_id=app_id,
                                     config=config)
                minion.get_or_create_spark_session = \
                    dummy_get_or_create_spark_session
                minion._emit_event = dummy_emit_event
                minion._state = {}
                minion._process_message()

                # Discard first status message
                state_control.pop_app_output_queue(app_id, False)

                msg = json.loads(
                    state_control.pop_app_output_queue(app_id, False))
                assert msg['status'] == 'WARNING', 'Invalid status'
                assert msg['code'] == minion.MNN003[0], 'Invalid code'

                msg = json.loads(
                    state_control.pop_app_output_queue(app_id, False))
                assert msg['status'] == 'ERROR', 'Invalid status'
                assert msg.get('code') == minion.MNN004[0], 'Invalid code'

                result = json.loads(state_control.pop_queue(out_queue, False))
                assert not result['sample'], 'Wrong CSV generated'
Пример #8
0
def test_minion_perform_execute_success():
    workflow_id = '6666'
    app_id = '897447'
    job_id = '1'
    function_name = 'juicer.spark.transpiler.SparkTranspiler.transpile'

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch(function_name) as mocked_transpile:
            # Setup for mocked_transpile
            mocked_transpile.side_effect = get_side_effect(None, 0, 1)

            redis_conn = mocked_redis()
            minion = SparkMinion(redis_conn=redis_conn,
                                 workflow_id=workflow_id,
                                 app_id=app_id,
                                 config=config)
            minion.get_or_create_spark_session = \
                dummy_get_or_create_spark_session
            minion._emit_event = dummy_emit_event
            # Configure mocked redis
            state_control = StateControlRedis(redis_conn)
            with open(
                    os.path.join(os.path.dirname(__file__),
                                 'fixtures/simple_workflow.json')) as f:
                data = json.loads(f.read())

            msg = {
                'workflow_id': workflow_id,
                'app_id': app_id,
                'job_id': job_id,
                'type': 'execute',
                'workflow': data
            }

            state_control.push_app_queue(app_id, json.dumps(msg))

            minion._process_message()
            assert minion._state == {
                "xyz-647": {
                    'port0': {
                        'output': "df",
                        'sample': []
                    },
                    'time': 27.27
                }
            }, 'Invalid state'

            assert state_control.get_app_output_queue_size(
                app_id) == 1, 'Wrong number of output messages'
Пример #9
0
def test_minion_perform_deliver_success():
    workflow_id = '6666'
    app_id = '1000'
    job_id = '1'
    out_queue = 'queue_2000'
    sconf = SparkConf()
    sc = SparkContext(master='', conf=sconf)

    rdd = sc.parallelize(get_records())

    df0 = DataFrame(rdd=rdd)
    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        redis_conn = mocked_redis()
        state_control = StateControlRedis(redis_conn)

        data = {
            'workflow_id': workflow_id,
            'app_id': app_id,
            'job_id': job_id,
            'type': 'deliver',
            'task_id': '033f-284ab-28987e',
            'port': 'port0',
            'output': out_queue,
            'workflow': ''
        }
        state_control.push_app_queue(app_id, json.dumps(data))
        minion = SparkMinion(redis_conn=redis_conn,
                             workflow_id=workflow_id,
                             app_id=app_id,
                             config=config)
        minion._emit_event = dummy_emit_event
        minion._state = {
            data['task_id']: {
                'port0': {
                    'output': df0,
                    'sample': []
                },
                'time': 35.92
            }
        }
        minion._process_message()

        # Discard first status message
        state_control.pop_app_output_queue(app_id, False)

        msg = json.loads(state_control.pop_app_output_queue(app_id, False))
        assert msg['status'] == 'SUCCESS', 'Invalid status'
        assert msg['code'] == minion.MNN002[0], 'Invalid code'

        # CSV data
        csv_records = '\n'.join(
            map(dataframe_util.convert_to_csv, get_records()))

        result = json.loads(state_control.pop_queue(out_queue, False))
        assert result['sample'] == csv_records, 'Wrong CSV generated'
Пример #10
0
def test_runner_read_start_queue_workflow_not_started_failure():
    config = {'juicer': {'servers': {'redis_url': "nonexisting.mock"}}}
    app_id = '1'
    workflow_id = '1000'
    workflow = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'type': 'execute',
        'workflow': {}
    }

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch('subprocess.Popen') as mocked_popen:
            server = server = JuicerServer(config, 'faked_minions.py')
            mocked_redis_conn = mocked_redis()
            state_control = StateControlRedis(mocked_redis_conn)

            # Publishes a message to process data
            state_control.push_start_queue(json.dumps(workflow))

            # This workflow is being processed, should not start it again
            # state_control.set_workflow_status(workflow_id, JuicerServer.STARTED)
            server.active_minions[(workflow_id, app_id)] = '_'

            # Start of testing
            server.read_job_start_queue(mocked_redis_conn)

            assert state_control.get_minion_status(app_id) is None
            assert not mocked_popen.called
            # Was command removed from the queue?
            assert state_control.pop_job_start_queue(False) is None

            assert state_control.get_app_queue_size(workflow_id) == 0
Пример #11
0
def test_runner_read_start_queue_success():
    config = {'juicer': {'servers': {'redis_url': "nonexisting.mock"}}}
    app_id = '1'
    workflow_id = '1000'
    workflow = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'type': 'execute',
        'workflow': {}
    }

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch('subprocess.Popen') as mocked_popen:
            server = server = JuicerServer(config,
                                           'faked_minions.py',
                                           config_file_path='config.yaml')
            mocked_redis_conn = mocked_redis()
            state_control = StateControlRedis(mocked_redis_conn)

            # Publishes a message to process data
            state_control.push_start_queue(json.dumps(workflow))

            # Start of testing
            server.read_job_start_queue(mocked_redis_conn)

            d1 = json.loads(state_control.get_minion_status(app_id))
            d2 = {
                "port": 36000,
                "pid": 1,
            }
            assert d1 == d2

            assert mocked_popen.call_args_list[0][0][0] == [
                'nohup', sys.executable, 'faked_minions.py', '-w', workflow_id,
                '-a', app_id, '-t', 'spark', '-c', 'config.yaml'
            ]
            assert mocked_popen.called

            # Was command removed from the queue?
            assert state_control.pop_job_start_queue(False) is None

            assert json.loads(state_control.pop_app_queue(app_id)) == workflow

            assert state_control.get_workflow_status(
                workflow_id) == JuicerServer.STARTED
            assert json.loads(state_control.pop_app_output_queue(app_id)) == {
                'code': 0,
                'message': 'Minion is processing message execute'
            }
Пример #12
0
def test_minion_ping_success():
    workflow_id = 6666
    app_id = 897987

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        redis_conn = mocked_redis()
        minion = SparkMinion(redis_conn=redis_conn,
                             workflow_id=workflow_id,
                             app_id=app_id,
                             config=config)
        minion._emit_event = dummy_emit_event
        minion._perform_ping()

        state_control = StateControlRedis(redis_conn)

        assert json.loads(state_control.get_minion_status(app_id)) == {
            'status': 'READY',
            'pid': os.getpid()
        }
        assert state_control.get_app_output_queue_size(app_id) == 0
Пример #13
0
def test_runner_read_start_queue_minion_already_running_success():
    config = {'juicer': {'servers': {'redis_url': "nonexisting.mock"}}}
    app_id = 1
    workflow_id = 1000
    workflow = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'type': 'execute',
        'workflow': {}
    }

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch('subprocess.Popen') as mocked_popen:
            server = JuicerServer(config, 'faked_minions.py')
            mocked_redis_conn = mocked_redis()
            state_control = StateControlRedis(mocked_redis_conn)

            # Publishes a message to process data
            state_control.push_start_queue(json.dumps(workflow))
            state_control.set_minion_status(app_id, JuicerServer.STARTED)

            # Start of testing
            server.read_job_start_queue(mocked_redis_conn)

            assert state_control.get_minion_status(
                app_id) == JuicerServer.STARTED

            assert not mocked_popen.called
            # Was command removed from the queue?
            assert mocked_redis_conn.lpop('start') is None
            assert json.loads(state_control.pop_app_queue(app_id)) == workflow

            assert state_control.get_workflow_status(
                workflow_id) == JuicerServer.STARTED
            assert json.loads(state_control.pop_app_output_queue(app_id)) == {
                'code': 0,
                'message': 'Minion is processing message execute'
            }
Пример #14
0
def test_runner_read_start_queue_missing_details_failure():
    config = {'juicer': {'servers': {'redis_url': "nonexisting.mock"}}}
    app_id = 1
    workflow_id = 1000
    # incorrect key, should raise exception
    workflow = {
        'workflow_id': workflow_id,
        'xapp_id': app_id,
        'type': 'execute',
        'workflow': {}
    }

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch('subprocess.Popen') as mocked_popen:
            server = JuicerServer(config, 'faked_minions.py')
            mocked_redis_conn = mocked_redis()
            # Publishes a message to process data
            state_control = StateControlRedis(mocked_redis_conn)

            # Publishes a message to process data
            state_control.push_start_queue(json.dumps(workflow))
            state_control.set_minion_status(app_id, JuicerServer.STARTED)

            # Start of testing
            server.read_job_start_queue(mocked_redis_conn)

            assert state_control.get_minion_status(
                app_id) == JuicerServer.STARTED

            assert not mocked_popen.called
            # Was command removed from the queue?
            assert state_control.pop_job_start_queue(block=False) is None
            assert state_control.pop_app_queue(app_id, block=False) is None
            assert state_control.pop_app_output_queue(app_id,
                                                      block=False) is None

            assert mocked_redis_conn.hget(workflow_id, 'status') is None
Пример #15
0
def test_runner_multiple_jobs_single_app():
    """
    - Start a juicer server
    - Instanciate a minion for an application
    - Submit more than one job to the same (workflow_id,app_id)
    - Assert that just one minion was launched
    """

    config = {'juicer': {'servers': {'redis_url': "nonexisting.mock"}}}
    app_id = 1
    workflow_id = 1000
    workflow = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'type': 'execute',
        'workflow': {}
    }

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch('subprocess.Popen') as mocked_popen:
            server = JuicerServer(config, 'faked_minions.py')
            mocked_redis_conn = mocked_redis()

            # Publishes a message to process data
            state_control = StateControlRedis(mocked_redis_conn)

            # Publishes a message to process data
            state_control.push_start_queue(json.dumps(workflow))
            state_control.push_start_queue(json.dumps(workflow))

            # Start of testing
            server.read_job_start_queue(mocked_redis_conn)
            server.read_job_start_queue(mocked_redis_conn)

            assert len(server.active_minions) == 1
            assert mocked_popen.called
Пример #16
0
def test_minion_generate_invalid_code_failure():
    workflow_id = '6666'
    app_id = '667788'
    job_id = '1'
    workflow = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'job_id': job_id,
        'type': 'execute',
        'workflow': ''
    }
    function_name = 'juicer.spark.transpiler.SparkTranspiler.transpile'

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch(function_name) as mocked_transpile:
            with mock.patch('juicer.workflow.workflow.Workflow'
                            '._build_initial_workflow_graph') as mocked_fn:
                mocked_fn.side_effect = lambda: ""
                # Setup for mocked_transpile
                mocked_transpile.side_effect = get_side_effect(None, None, 4)
                redis_conn = mocked_redis()
                minion = SparkMinion(redis_conn=redis_conn,
                                     workflow_id=workflow_id,
                                     app_id=app_id,
                                     config=config)
                minion._emit_event = dummy_emit_event
                # Configure mocked redis
                with open(
                        os.path.join(os.path.dirname(__file__),
                                     'fixtures/simple_workflow.json')) as f:
                    data = json.loads(f.read())
                    workflow['workflow'] = data

                state_control = StateControlRedis(redis_conn)
                state_control.push_app_queue(app_id, json.dumps(workflow))
                minion._process_message()

                assert state_control.get_app_output_queue_size(
                    app_id) == 2, 'Wrong number of output messages'
                # Discards
                state_control.pop_app_output_queue(app_id)

                msg = json.loads(state_control.pop_app_output_queue(app_id))
                assert msg['status'] == 'ERROR'
                assert msg['message'][:19] == 'Invalid Python code'
Пример #17
0
def test_runner_multiple_jobs_multiple_apps():
    """
    - Start a juicer server
    - Instanciate two minions for two different aplications
    - Submit jobs for both minions
    - Assert that two minions were launched
    """

    config = {'juicer': {'servers': {'redis_url': "nonexisting.mock"}}}

    app_id = 1
    workflow_id = 1000
    workflow1 = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'type': 'execute',
        'workflow': {}
    }
    workflow2 = {
        'workflow_id': workflow_id + 1,
        'app_id': app_id + 1,
        'type': 'execute',
        'workflow': {}
    }

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch('subprocess.Popen') as mocked_popen:
            server = JuicerServer(config, 'faked_minions.py')
            mocked_redis_conn = mocked_redis()

            # Publishes a message to process data
            state_control = StateControlRedis(mocked_redis_conn)

            # Publishes a message to process data
            state_control.push_start_queue(json.dumps(workflow1))
            state_control.push_start_queue(json.dumps(workflow2))
            state_control.push_start_queue(json.dumps(workflow2))
            state_control.push_start_queue(json.dumps(workflow1))

            # Start of testing
            server.read_job_start_queue(mocked_redis_conn)
            server.read_job_start_queue(mocked_redis_conn)
            server.read_job_start_queue(mocked_redis_conn)
            server.read_job_start_queue(mocked_redis_conn)

            assert len(server.active_minions) == 2
            assert mocked_popen.called
Пример #18
0
def test_runner_master_queue_client_shutdown_success():
    config = {'juicer': {'servers': {'redis_url': "nonexisting.mock"}}}
    app_id = 1
    ticket = {
        "app_id": app_id,
        'reason': JuicerServer.HELP_UNHANDLED_EXCEPTION
    }

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch('subprocess.Popen') as mocked_popen:
            with mock.patch('os.kill') as mocked_kill:
                server = JuicerServer(config, 'faked_minions.py')
                mocked_redis_conn = mocked_redis()
                # Publishes a message to process data
                state_control = StateControlRedis(mocked_redis_conn)

                # Configure minion
                status = {'app_id': app_id, 'pid': 9999}
                state_control.set_minion_status(app_id, json.dumps(status))
                error = OSError()
                error.errno = errno.ESRCH
                mocked_kill.side_effect = error
                # Publishes a message to master queue
                state_control.push_master_queue(json.dumps(ticket))

                # Start of testing
                server.read_minion_support_queue(mocked_redis_conn)

                d1 = json.loads(state_control.get_minion_status(app_id))
                d2 = {"pid": 1, "port": 36000}
                assert d1 == d2

                assert mocked_popen.called

                mocked_kill.assert_called_once_with(status['pid'],
                                                    signal.SIGKILL)
Пример #19
0
def test_minion_perform_deliver_missing_state_process_app_with_failure():
    workflow_id = '6666'
    app_id = '6000'
    job_id = '1'
    out_queue = 'queue_2000'
    task_id = '033f-284ab-28987e'
    function_name = 'juicer.spark.transpiler.SparkTranspiler.transpile'

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch(function_name) as mocked_transpile:
            with mock.patch('juicer.workflow.workflow.Workflow'
                            '._build_initial_workflow_graph') as mocked_fn:
                mocked_fn.side_effect = lambda: ""
                # Setup for mocked_transpile
                # Invalid code
                mocked_transpile.side_effect = get_side_effect(
                    get_records(), task_id, 4)
                redis_conn = mocked_redis()
                state_control = StateControlRedis(redis_conn)

                data = {
                    'workflow_id': workflow_id,
                    'app_id': app_id,
                    'job_id': job_id,
                    'type': 'deliver',
                    'task_id': task_id,
                    'port': 'port0',
                    'output': out_queue,
                    'workflow': {
                        "tasks": [],
                        "flows": []
                    }
                }

                state_control.push_app_queue(app_id, json.dumps(data))
                minion = SparkMinion(redis_conn=redis_conn,
                                     workflow_id=workflow_id,
                                     app_id=app_id,
                                     config=config)
                minion._emit_event = dummy_emit_event
                minion._state = {}
                minion._process_message()

                # Discard first status message
                state_control.pop_app_output_queue(app_id, False)

                # First message is about missing state
                msg = json.loads(
                    state_control.pop_app_output_queue(app_id, False))
                assert msg['status'] == 'WARNING', 'Invalid status'
                assert msg['code'] == minion.MNN003[0], 'Invalid code'

                # Second message is about invalid Python code
                msg = json.loads(
                    state_control.pop_app_output_queue(app_id, False))
                assert msg['status'] == 'ERROR', 'Invalid status'
                assert msg.get('code') == minion.MNN006[0], 'Invalid code'

                # Third message is about unable to read data
                msg = json.loads(
                    state_control.pop_app_output_queue(app_id, False))
                assert msg['status'] == 'ERROR', 'Invalid status'
                assert msg.get('code') == minion.MNN005[0], 'Invalid code'

                assert state_control.get_app_output_queue_size(
                    app_id) == 0, 'There are messages in app output queue!'

                result = json.loads(state_control.pop_queue(out_queue, False))
                assert not result['sample'], 'Wrong CSV generated'
Пример #20
0
class JuicerServer:
    """
    The JuicerServer is responsible for managing the lifecycle of minions.
    A minion controls a application, i.e., an active instance of an workflow.
    Thus, the JuicerServer receives launch request from clients, launches and
    manages minion processes and takes care of their properly termination.
    """
    STARTED = 'STARTED'
    LOADED = 'LOADED'
    TERMINATED = 'TERMINATED'
    HELP_UNHANDLED_EXCEPTION = 1
    HELP_STATE_LOST = 2
    BANNER = """
     ██╗██╗   ██╗██╗ ██████╗███████╗██████╗ 
     ██║██║   ██║██║██╔════╝██╔════╝██╔══██╗
     ██║██║   ██║██║██║     █████╗  ██████╔╝
██   ██║██║   ██║██║██║     ██╔══╝  ██╔══██╗
╚█████╔╝╚██████╔╝██║╚██████╗███████╗██║  ██║
 ╚════╝  ╚═════╝ ╚═╝ ╚═════╝╚══════╝╚═╝  ╚═╝
"""

    def __init__(self,
                 config,
                 minion_executable,
                 log_dir='/tmp',
                 config_file_path=None):

        self.minion_support_process = None
        self.new_minion_watch_process = None
        self.start_process = None
        self.minion_status_process = None
        self.state_control = None
        self.minion_watch_process = None

        self.active_minions = {}

        self.config = config
        configuration.set_config(config)
        self.config_file_path = config_file_path
        self.minion_executable = minion_executable
        self.log_dir = log_dir or self.config['juicer'].get('log', {}).get(
            'path', '/tmp')

        signal.signal(signal.SIGTERM, self._terminate)

        self.port_range = list(
            range(*(config['juicer'].get('minion', {}).get(
                'libprocess_port_range', [36000, 36500]))))
        self.advertise_ip = config['juicer'].get(
            'minion', {}).get('libprocess_advertise_ip')

        # Minion requires 3 different ports:
        # 1 for libprocess/Mesos communication
        # 1 for driver port
        # 1 for block manager
        self.port_offset = config['juicer'].get('minion',
                                                {}).get('port_offset', 100)

        self.mgr = socketio.RedisManager(
            config['juicer']['servers']['redis_url'], 'job_output')

    def _emit_event(self, room, name, namespace, message, status, identifier,
                    **kwargs):
        data = {'message': message, 'status': status, 'id': identifier}
        data.update(kwargs)
        print('-' * 20)
        print('Emiting', data)
        print('-' * 20)
        self.mgr.emit(name, data=data, room=str(room), namespace=namespace)

    def start(self):
        signal.signal(signal.SIGTERM, self._terminate_minions)
        log.info(_('Starting master process. Reading "start" queue'))

        parsed_url = urlparse(self.config['juicer']['servers']['redis_url'])
        redis_conn = redis.StrictRedis(host=parsed_url.hostname,
                                       port=parsed_url.port,
                                       decode_responses=True)

        # Start pending minions
        apps = [q.split('_')[-1] for q in redis_conn.keys('queue_app_*')]
        self.state_control = StateControlRedis(redis_conn)

        for app_id in apps:
            pending = redis_conn.lrange('queue_app_{}'.format(app_id), 0, 0)
            if pending and len(pending) > 0:
                msg = json.loads(pending[0])
                log.warn(_('Starting pending app_id {}').format(app_id))
                # FIXME: cluster
                cluster = msg['cluster']
                platform = msg['workflow']['platform']['slug']
                job_id = msg['job_id']

                self._start_minion(app_id,
                                   app_id,
                                   job_id,
                                   self.state_control,
                                   platform,
                                   cluster=cluster)
            else:
                log.warn(_("Pending queue is empty"))

        while True:
            self.read_start_queue(redis_conn)

    # noinspection PyMethodMayBeStatic
    def read_start_queue(self, redis_conn):
        app_id = None
        try:
            self.state_control = StateControlRedis(redis_conn)
            # Process next message
            log.info(_('Reading "start" queue.'))
            msg = self.state_control.pop_start_queue()
            log.info(_('Forwarding message to minion.'))
            msg_info = json.loads(msg)

            # Extract message type and common parameters
            msg_type = msg_info['type']
            workflow_id = str(msg_info['workflow_id'])
            app_id = str(msg_info['app_id'])
            job_id = str(msg_info.get('job_id', 0))
            if msg_type in juicer_protocol.EXECUTE:
                platform = msg_info['workflow'].get('platform',
                                                    {}).get('slug', 'spark')
                cluster = msg_info['cluster']
                self._forward_to_minion(msg_type, workflow_id, app_id, job_id,
                                        msg, platform, cluster)

            elif msg_type == juicer_protocol.TERMINATE:
                cluster = msg_info.get('cluster')
                platform = msg_info.get('workflow',
                                        {}).get('platform',
                                                {}).get('slug', 'spark')
                # FIXME
                job_id = 0
                self._forward_to_minion(msg_type, workflow_id, app_id, job_id,
                                        msg, platform, cluster)
                self._terminate_minion(workflow_id, app_id)

            else:
                log.warn(_('Unknown message type %s'), msg_type)

        except ConnectionError as cx:
            log.exception(cx)
            time.sleep(1)

        except JuicerException as je:
            log.exception(je)
            if app_id:
                self.state_control.push_app_output_queue(
                    app_id, json.dumps({
                        'code': je.code,
                        'message': str(je)
                    }))
        except KeyboardInterrupt:
            pass
        except Exception as ex:
            log.exception(ex)
            if app_id:
                self.state_control.push_app_output_queue(
                    app_id, json.dumps({
                        'code': 500,
                        'message': str(ex)
                    }))

    def _forward_to_minion(self, msg_type, workflow_id, app_id, job_id, msg,
                           platform, cluster):
        # Get minion status, if it exists
        minion_info = self.state_control.get_minion_status(app_id)
        log.info(_('Minion status for (workflow_id=%s,app_id=%s): %s'),
                 workflow_id, app_id, minion_info)

        # If there is status registered for the application then we do not
        # need to launch a minion for it, because it is already running.
        # Otherwise, we launch a new minion for the application.
        if minion_info:
            log.info(_('Minion (workflow_id=%s,app_id=%s) is running on %s.'),
                     workflow_id, app_id, platform)
        else:
            # This is a special case when the minion timed out.
            # In this case we kill it before starting a new one
            if (workflow_id, app_id) in self.active_minions:
                self._terminate_minion(workflow_id, app_id)

            minion_process = self._start_minion(workflow_id,
                                                app_id,
                                                job_id,
                                                self.state_control,
                                                platform,
                                                cluster=cluster)
            # FIXME Kubernetes
            self.active_minions[(workflow_id, app_id)] = {
                'pid': minion_process.pid if minion_process else 0,
                'process': minion_process,
                'cluster': cluster,
                'port': self._get_next_available_port()
            }

        # Forward the message to the minion, which can be an execute or a
        # deliver command
        self.state_control.push_app_queue(app_id, msg)
        self.state_control.set_workflow_status(workflow_id, self.STARTED)

        log.info(
            _('Message %s forwarded to minion (workflow_id=%s,app_id=%s)'),
            msg_type, workflow_id, app_id)
        # log.info(_('Message content (workflow_id=%s,app_id=%s): %s'),
        #          workflow_id, app_id, msg)
        self.state_control.push_app_output_queue(
            app_id,
            json.dumps({
                'code': 0,
                'message': 'Minion is processing message %s' % msg_type
            }))

    def _start_minion(self,
                      workflow_id,
                      app_id,
                      job_id,
                      state_control,
                      platform,
                      restart=False,
                      cluster=None):

        log.info('Cluster: %s', cluster)
        if cluster is None:
            cluster = {}
        if cluster.get('type') == 'KUBERNETES':
            return self._start_kubernetes_minion(workflow_id, app_id, job_id,
                                                 state_control, platform,
                                                 restart, cluster)
        else:
            return self._start_subprocess_minion(workflow_id, app_id, job_id,
                                                 state_control, platform,
                                                 restart, cluster)

    def _start_kubernetes_minion(self,
                                 workflow_id,
                                 app_id,
                                 job_id,
                                 state_control,
                                 platform,
                                 restart=False,
                                 cluster=None):
        if cluster is None:
            cluster = {}
        from juicer.kb8s import create_kb8s_job

        self._emit_event(room=job_id,
                         namespace='/stand',
                         name='update job',
                         message=_('Creating a JOB in Kubernetes.'),
                         status='INFO',
                         identifier=job_id)

        minion_id = 'minion_{}_{}'.format(workflow_id, app_id)
        log.info(_('Starting minion %s in Kubernetes.'), minion_id)

        minion_cmd = [
            'python',
            '/usr/local/juicer/juicer/runner/minion.py',
            '-w',
            str(workflow_id),
            '-a',
            str(app_id),
            '-t',
            platform,
            '-c',
            self.config_file_path,
        ]
        log.info(_('Minion command: %s'), json.dumps(minion_cmd))
        create_kb8s_job(workflow_id, minion_cmd, cluster)

        # Expires in 300 seconds (enough to KB8s start the pod?)
        proc_id = int(1)
        state_control.set_minion_status(app_id,
                                        json.dumps({'pid': proc_id}),
                                        ex=300,
                                        nx=False)
        return {}

    def _start_subprocess_minion(self,
                                 workflow_id,
                                 app_id,
                                 job_id,
                                 state_control,
                                 platform,
                                 restart=False,
                                 cluster=None):
        if cluster is None:
            cluster = {}
        minion_id = 'minion_{}_{}'.format(workflow_id, app_id)
        stdout_log = os.path.join(self.log_dir, minion_id + '_out.log')
        stderr_log = os.path.join(self.log_dir, minion_id + '_err.log')
        log.info(_('Forking minion %s.'), minion_id)

        port = self._get_next_available_port()

        # Setup command and launch the minion script. We return the subprocess
        # created as part of an active minion.
        # spark.driver.port and spark.driver.blockManager.port are required
        # when running the driver inside a docker container.
        minion_cmd = [
            'nohup',
            sys.executable,
            self.minion_executable,
            '-w',
            str(workflow_id),
            '-a',
            str(app_id),
            '-t',
            platform,
            '-c',
            self.config_file_path,
        ]
        log.info(_('Minion command: %s'), json.dumps(minion_cmd))

        # Mesos / libprocess configuration. See:
        # http://mesos.apache.org/documentation/latest/configuration/libprocess/
        cloned_env = os.environ.copy()
        cloned_env['LIBPROCESS_PORT'] = str(port)
        cloned_env['SPARK_DRIVER_PORT'] = str(port + self.port_offset)
        cloned_env['SPARK_DRIVER_BLOCKMANAGER_PORT'] = str(port + 2 *
                                                           self.port_offset)

        if self.advertise_ip is not None:
            cloned_env['LIBPROCESS_ADVERTISE_IP'] = self.advertise_ip

        proc = subprocess.Popen(minion_cmd,
                                stdout=open(stdout_log, 'a'),
                                stderr=open(stderr_log, 'a'),
                                env=cloned_env)

        # Expires in 30 seconds and sets only if it doesn't exist
        proc_id = int(proc.pid)
        state_control.set_minion_status(app_id,
                                        json.dumps({
                                            'pid': proc_id,
                                            'port': port
                                        }),
                                        ex=30,
                                        nx=False)
        return proc

    def _terminate_minion(self, workflow_id, app_id):
        # In this case we got a request for terminating this workflow
        # execution instance (app). Thus, we are going to explicitly
        # terminate the workflow, clear any remaining metadata and return
        if not (workflow_id, app_id) in self.active_minions:
            log.warn('(%s, %s) not in active minions ', workflow_id, app_id)
        log.info(_("Terminating (workflow_id=%s,app_id=%s)"), workflow_id,
                 app_id)
        minion_data = self.active_minions.get((workflow_id, app_id))
        cluster = minion_data.get('cluster', {}) if minion_data else None
        if cluster is not None and cluster.get('type') == 'KUBERNETES':
            # try to kill Job in KB8s
            delete_kb8s_job(workflow_id, cluster)
        elif (workflow_id, app_id) in self.active_minions:
            os.kill(self.active_minions[(workflow_id, app_id)].get('pid'),
                    signal.SIGTERM)
            del self.active_minions[(workflow_id, app_id)]

    def minion_support(self):
        """
         Control minion resource allocation and execution.
         Improve: define a parameter for sleeping time
        """
        # while True:
        #     print(self.active_minions)
        #     for (workflow_id, app_id), minion_data in list(
        #             self.active_minions.items()):
        #         cluster = minion_data.get('cluster', {})
        #         if cluster is not None and cluster.get('type') == 'KUBERNETES'
        #             eval_and_kill_pending_jobs(cluster)
        #     time.sleep(10)
        pass

    #
    # def read_minion_support_queue(self, redis_conn):
    #     try:
    #         state_control = StateControlRedis(redis_conn)
    #         ticket = json.loads(state_control.pop_master_queue())
    #         workflow_id = ticket.get('workflow_id')
    #         app_id = ticket.get('app_id', ticket.get('workflow_id'))
    #         reason = ticket.get('reason')
    #         log.info(_("Master received a ticket for app %s"), app_id)
    #         if reason == self.HELP_UNHANDLED_EXCEPTION:
    #             # Let's kill the minion and start another
    #             minion_info = json.loads(
    #                 state_control.get_minion_status(app_id))
    #             while True:
    #                 try:
    #                     os.kill(minion_info['pid'], signal.SIGKILL)
    #                 except OSError as err:
    #                     if err.errno == errno.ESRCH:
    #                         break
    #                 time.sleep(.5)
    #
    #             # Review with cluster
    #             # FIXME: platform
    #             platform = 'spark'
    #             self._start_minion(workflow_id, app_id, state_control,
    #                                platform)
    #
    #         elif reason == self.HELP_STATE_LOST:
    #             pass
    #         else:
    #             log.warn(_("Unknown help reason %s"), reason)
    #     except KeyboardInterrupt:
    #         pass
    #     except ConnectionError as cx:
    #         log.exception(cx)
    #         time.sleep(1)
    #
    #     except Exception as ex:
    #         log.exception(ex)

    def _get_next_available_port(self):
        used_ports = set(
            [minion['port'] for minion in list(self.active_minions.values())])
        for i in self.port_range:
            if i not in used_ports:
                return i
        raise ValueError(
            _('Unable to launch minion: there is not available '
              'port for libprocess.'))

    def watch_new_minion(self):
        try:
            log.info(_('Watching minions events.'))

            parsed_url = urlparse(
                self.config['juicer']['servers']['redis_url'])
            redis_conn = redis.StrictRedis(host=parsed_url.hostname,
                                           port=parsed_url.port)
            redis_conn.config_set('notify-keyspace-events', 'KE$gx')
            pub_sub = redis_conn.pubsub()
            pub_sub.psubscribe('__keyspace*__:key_minion_app*')
            for msg in pub_sub.listen():
                # print('|{}|'.format(msg.get('channel')))
                app_id = msg.get('channel', '').decode('utf8').split('_')[-1]
                if app_id.isdigit():
                    app_id = int(app_id)
                    key = (app_id, app_id)
                    data = msg.get('data', '')
                    if key in self.active_minions:
                        if data == b'del' or data == b'expired':
                            del self.active_minions[key]
                            log.info(_('Minion {} finished.').format(app_id))
                            pending = redis_conn.lrange(
                                'queue_app_{}'.format(app_id), 0, 0)
                            if pending:
                                log.warn(
                                    _('There are messages to process in app {} '
                                      'queue, starting minion.').format(
                                          app_id))
                                if self.state_control is None:
                                    self.state_control = StateControlRedis(
                                        redis_conn)
                                # FIXME: Cluster and platform and job_id
                                print('-' * 10)
                                print(pending)
                                print('-' * 10)
                                platform = 'spark'
                                self._start_minion(app_id, app_id, 0,
                                                   self.state_control,
                                                   platform)

                    elif data == b'set':
                        # Externally launched minion
                        minion_info = json.loads(
                            redis_conn.get('key_minion_app_{}'.format(
                                app_id)).decode('utf8'))
                        port = self._get_next_available_port()
                        self.active_minions[key] = {
                            'pid': minion_info.get('pid'),
                            'port': port
                        }
                        log.info(
                            _('Minion {} joined (pid: {}, port: {}).').format(
                                app_id, minion_info.get('pid'), port))
        except KeyboardInterrupt:
            pass
        except ConnectionError as cx:
            log.exception(cx)
            time.sleep(1)

    def process(self):
        log.info(_('Juicer server started (pid=%s)'), os.getpid())
        self.start_process = multiprocessing.Process(name="master",
                                                     target=self.start)
        self.start_process.daemon = False

        self.minion_support_process = multiprocessing.Process(
            name="help_desk", target=self.minion_support)
        self.minion_support_process.daemon = False

        self.new_minion_watch_process = multiprocessing.Process(
            name="minion_status", target=self.watch_new_minion)
        self.new_minion_watch_process.daemon = False

        self.start_process.start()
        self.minion_support_process.start()
        self.new_minion_watch_process.start()

        try:
            self.start_process.join()
            self.minion_support_process.join()
            self.new_minion_watch_process.join()
        except KeyboardInterrupt:
            self._terminate(None, None)

    # noinspection PyUnusedLocal
    def _terminate_minions(self, _signal, _frame):
        log.info(_('Terminating %s active minions'), len(self.active_minions))
        minions = [m for m in self.active_minions]
        for (wid, aid) in minions:
            self._terminate_minion(wid, aid)
        sys.exit(0)

    # noinspection PyUnusedLocal
    def _terminate(self, _signal, _frame):
        """
        This is a handler that reacts to a sigkill signal.
        """
        log.info(_('Killing juicer server subprocesses and terminating'))
        if self.start_process:
            os.kill(self.start_process.pid, signal.SIGTERM)
        if self.minion_support_process:
            os.kill(self.minion_support_process.pid, signal.SIGKILL)
        # if self.minion_watch_process:
        #     os.kill(self.minion_watch_process.pid, signal.SIGKILL)
        if self.new_minion_watch_process:
            os.kill(self.new_minion_watch_process.pid, signal.SIGKILL)
Пример #21
0
def test_minion_spark_configuration():
    """
    - Start a juicer server
    - Instanciate one minion passing specific configurations w.r.t. runtime
      environments (app_configs)
    - Assert that the spark context within the minion inherited the same configs
      that it was supposed to inherit
    """

    try:
        # noinspection PyUnresolvedReferences
        from pyspark.sql import SparkSession
    except ImportError:
        # we will skip this test because pyspark is not installed
        return

    workflow_id = '6666'
    app_id = '667788'
    job_id = '1'
    app_configs = {'spark.master': 'local[3]', 'config1': '1', 'config2': '2'}
    function_name = 'juicer.spark.transpiler.SparkTranspiler.transpile'

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch(function_name) as mocked_transpile:
            with mock.patch('juicer.workflow.workflow.Workflow'
                            '._build_initial_workflow_graph') as mocked_fn:
                mocked_fn.side_effect = lambda: ""
                # Setup for mocked_transpile
                mocked_transpile.side_effect = get_side_effect(None, 0, 1)

                redis_conn = mocked_redis()
                minion = SparkMinion(redis_conn=redis_conn,
                                     workflow_id=workflow_id,
                                     app_id=app_id,
                                     config=config)
                minion._emit_event = dummy_emit_event

                # Configure mocked redis
                state_control = StateControlRedis(redis_conn)
                with open(
                        os.path.join(os.path.dirname(__file__),
                                     'fixtures/simple_workflow.json')) as f:
                    data = json.loads(f.read())

                state_control.push_app_queue(
                    app_id,
                    json.dumps({
                        'workflow_id': workflow_id,
                        'app_id': app_id,
                        'job_id': job_id,
                        'type': 'execute',
                        'app_configs': app_configs,
                        'workflow': data
                    }))

                minion._process_message()

                # check spark session health
                assert minion.spark_session is not None
                assert minion.is_spark_session_available()

                # check configs
                ctx_configs = \
                    minion.spark_session.sparkContext.getConf().getAll()
                ctx_configs = {k: v for k, v in ctx_configs}
                for k, v in app_configs.items():
                    assert ctx_configs[k] == v

                # check app name
                name = minion.spark_session.sparkContext.appName
                assert name == u'{}(workflow_id={},app_id={})'.format(
                    data['name'], workflow_id, app_id)

                # check proper termination
                minion.terminate()
                assert not minion.is_spark_session_available()

                state_control.pop_app_output_queue(app_id, False)
                msg = json.loads(
                    state_control.pop_app_output_queue(app_id, False))
                assert msg['status'] == 'SUCCESS', 'Invalid status'
                assert msg['code'] == minion.MNN008[0], 'Invalid code'
Пример #22
0
def test_runner_minion_termination():
    """
    - Start a juicer server
    - Instanciate two minions
    - Kill the first, assert that it was killed and the other remains
    - Kill the second, assert that all minions were killed and their state
      cleaned
    """

    try:
        from pyspark.sql import SparkSession
    except ImportError as ie:
        # we will skip this test because pyspark is not installed
        return

    config = {'juicer': {'servers': {'redis_url': "nonexisting.mock"}}}
    app_id = 1
    workflow_id = 1000
    workflow1 = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'type': 'execute',
        'workflow': {}
    }

    workflow1_kill = {
        'workflow_id': workflow_id,
        'app_id': app_id,
        'type': 'terminate',
    }

    workflow2 = {
        'workflow_id': workflow_id + 1,
        'app_id': app_id + 1,
        'type': 'execute',
        'workflow': {}
    }

    workflow2_kill = {
        'workflow_id': workflow_id + 1,
        'app_id': app_id + 1,
        'type': 'terminate',
    }

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        config_file_path = os.path.join(os.path.dirname(__file__), 'fixtures',
                                        'juicer-server-config.yaml')
        server = JuicerServer(config,
                              'faked_minions.py',
                              config_file_path=config_file_path)
        mocked_redis_conn = mocked_redis()

        # Publishes a message to process data
        state_control = StateControlRedis(mocked_redis_conn)

        # Publishes a message to process data
        state_control.push_start_queue(json.dumps(workflow1))
        state_control.push_start_queue(json.dumps(workflow2))

        # Start of testing
        server.read_job_start_queue(mocked_redis_conn)
        server.read_job_start_queue(mocked_redis_conn)

        assert len(server.active_minions) == 2

        # kill first minion
        state_control.push_start_queue(json.dumps(workflow1_kill))
        server.read_job_start_queue(mocked_redis_conn)
        assert len(server.active_minions) == 1

        # kill second minion
        state_control.push_start_queue(json.dumps(workflow2_kill))
        server.read_job_start_queue(mocked_redis_conn)
        assert len(server.active_minions) == 0
Пример #23
0
def test_minion_terminate():
    try:
        from pyspark.sql import SparkSession
    except ImportError:
        # we will skip this test because pyspark is not installed
        return

    workflow_id = '6666'
    app_id = '897447'
    job_id = '1'
    function_name = 'juicer.spark.transpiler.SparkTranspiler.transpile'

    with mock.patch('redis.StrictRedis',
                    mock_strict_redis_client) as mocked_redis:
        with mock.patch(function_name) as mocked_transpile:
            # Setup for mocked_transpile
            mocked_transpile.side_effect = get_side_effect(None, 0, 5)

            redis_conn = mocked_redis()
            minion = SparkMinion(redis_conn=redis_conn,
                                 workflow_id=workflow_id,
                                 app_id=app_id,
                                 config=config)
            minion._emit_event = dummy_emit_event

            # Configure mocked redis
            state_control = StateControlRedis(redis_conn)
            with open(
                    os.path.join(os.path.dirname(__file__),
                                 'fixtures/simple_workflow.json')) as f:
                data = json.loads(f.read())

            # execute message
            state_control.push_app_queue(
                app_id,
                json.dumps({
                    'workflow_id': workflow_id,
                    'app_id': app_id,
                    'job_id': job_id,
                    'type': 'execute',
                    'workflow': data
                }))
            minion._process_message_nb()
            # discard extra message
            state_control.pop_app_output_queue(app_id, False)

            # job termination
            state_control.push_app_queue(
                app_id,
                json.dumps({
                    'workflow_id': workflow_id,
                    'app_id': app_id,
                    'job_id': job_id,
                    'type': 'terminate'
                }))
            minion._process_message()

            state_control.pop_app_output_queue(app_id, False)

            # first the spark app will throw an exception regarding the job
            # canceling
            msg = json.loads(state_control.pop_app_output_queue(app_id, False))
            assert msg['status'] == 'ERROR', 'Invalid status'
            assert msg['code'] == 1000, 'Invalid code'

            # second the minion will report success for the job canceling
            # operation
            msg = json.loads(state_control.pop_app_output_queue(app_id, False))
            assert msg['status'] == 'SUCCESS', 'Invalid status'
            assert msg['code'] == SparkMinion.MNN007[0], 'Invalid code'

            # assert app still alive
            assert minion.spark_session is not None
            assert minion.is_spark_session_available()

            # app termination
            state_control.push_app_queue(
                app_id,
                json.dumps({
                    'workflow_id': workflow_id,
                    'app_id': app_id,
                    'type': 'terminate'
                }))
            minion._process_message()
            # discard extra message
            state_control.pop_app_output_queue(app_id, False)

            msg = json.loads(state_control.pop_app_output_queue(app_id, False))
            assert msg['status'] == 'SUCCESS', 'Invalid status'
            assert msg['code'] == SparkMinion.MNN008[0], 'Invalid code'

            # assert app still alive
            assert minion.spark_session is None
            assert not minion.is_spark_session_available()

            minion.terminate()
            assert not minion.is_spark_session_available()
Пример #24
0
class Minion:
    MSG_PROCESSED = 'message_processed'

    def __init__(self, redis_conn, workflow_id, app_id, config):
        self.redis_conn = redis_conn
        self.state_control = StateControlRedis(self.redis_conn)
        self.workflow_id = workflow_id
        self.app_id = app_id
        self.config = config

        # Errors and messages
        self.MNN000 = ('MNN000', _('Success.'))
        self.MNN001 = ('MNN001', _('Port output format not supported.'))
        self.MNN002 = ('MNN002', _('Success getting data from task.'))
        self.MNN003 = ('MNN003', _('State does not exists, processing app.'))
        self.MNN004 = ('MNN004', _('Invalid port.'))
        self.MNN005 = ('MNN005',
                       _('Unable to retrieve data because a previous error.'))
        self.MNN006 = ('MNN006',
                       _('Invalid Python code or incorrect encoding: {}'))
        self.MNN007 = ('MNN007', _('Job {} was canceled'))
        self.MNN008 = ('MNN008', _('App {} was terminated'))
        self.MNN009 = ('MNN009', _('Workflow specification is missing'))
        self.MNN010 = (
            'MNN010',
            _('Task completed, but not executed (not used in the workflow).'))

        # Used in the template file, declared here to gettext detect them
        self.msgs = [
            _('Task running'),
            _('Task completed'),
            _('Task running (cached data)')
        ]

    def process(self):
        raise NotImplementedError()

    def _generate_output(self, message, status=None, code=None):
        """
        Sends feedback about execution of this minion.
        """
        obj = {
            'message': message,
            'workflow_id': self.workflow_id,
            'app_id': self.app_id,
            'code': code,
            'date': datetime.datetime.now().isoformat(),
            'status': status if status is not None else 'OK'
        }

        m = json.dumps(obj)
        self.state_control.push_app_output_queue(self.app_id, m)

    def _perform_ping(self):
        status = {
            'status': 'READY',
            'pid': os.getpid(),
        }
        self.state_control.set_minion_status(self.app_id,
                                             json.dumps(status),
                                             ex=10,
                                             nx=False)

    @staticmethod
    def reload_code(q):
        wm = pyinotify.WatchManager()
        notifier = pyinotify.Notifier(wm, EventHandler())
        wm.add_watch(_watch_dir, pyinotify.ALL_EVENTS, rec=True)
        notifier.loop()

    def ping(self, q):
        """ Pings redis to inform master this minion is online """
        log.info('Start ping')
        while q.empty():
            self._perform_ping()
            time.sleep(5)