Exemplo n.º 1
0
class DataServer:
    def __init__(self, config):
        self._cache = Cache(config)
        self._host = config['host']
        self._port = config['port']
        self._encodingType = config['encoding']
        self._encoding = Encoding(self._encodingType)

        self._adapter = ZMQServers(self._port, self._createReply, config)
        self.notAvailable = self._encoding.encode(
            self._createError('notAvailable', 'taskId notAvailable'))

    def listen(self):
        log.info('discovery serving on {host}:{port} with {encoding} encoding',
                 host=self._host,
                 port=self._port,
                 encoding=self._encodingType)
        self._adapter.listen()

    @timing
    def _createReply(self, message):
        try:
            decoded = self._encoding.decode(value=message, plainEncode=True)
            tasks = decoded.get('tasks')
            resultsAsTuple = self.getDataByTaskId(tasks)

        except Exception as e:
            result = self._createError('unknown', str(e))
            header, encoded = self._encoding.encode(result)
            return [header, encoded]
        parts = []
        for header, content in resultsAsTuple:
            parts.append(header)
            parts.append(content)
        return parts

    def getDataByTaskId(self, tasks):
        results = []
        for task in tasks:
            if (task not in self._cache):
                result = self.notAvailable
            else:
                result = self._cache.getWithHeader(task)
            results.append(result)
        return results

    def setSendingState(self, taskId, header, encoded, size):
        return self._cache.update(taskId, encoded, size=size, header=header)

    def _createError(self, code, message):
        return {'hkube_error': {'code': code, 'message': message}}

    def isLocal(self, host, port):
        return host == self._host and port == self._port

    def isServing(self):
        return self._adapter.isServing()

    def shutDown(self):
        self._adapter.close()
Exemplo n.º 2
0
class DataRequest:
    def __init__(self, reqDetails):
        encoding = reqDetails.get('encoding')
        address = reqDetails.get('address')
        timeout = reqDetails.get('timeout')
        networkTimeout = reqDetails.get('networkTimeout')
        tasks = reqDetails.get('tasks')
        options = {u'tasks': tasks}
        self.tasks = tasks
        self.encoding = Encoding(encoding)
        content = self.encoding.encode(options, plainEncode=True)
        self.request = dict()
        self.request.update(address)
        self.request.update({
            "content": content,
            "timeout": timeout,
            "networkTimeout": networkTimeout
        })

    @timing
    def invoke(self):
        try:
            log.info('tcp://{host}:{port}',
                     host=self.request['host'],
                     port=self.request['port'])
            adapter = ZMQRequest(self.request)
            responseFrames = adapter.invokeAdapter()
            results = []
            for i in range(0, int(len(responseFrames) / 2)):
                header = responseFrames[i * 2]
                content = responseFrames[i * 2 + 1]
                decoded = self.encoding.decode(header=header, value=content)
                results.append((len(content), decoded))
            return results
        except Exception as e:
            results = []
            for _ in self.tasks:
                results.append((0, self._createError('unknown', str(e))))
            return results
        finally:
            adapter.close()

    def _createError(self, code, message):
        return {'hkube_error': {'code': code, 'message': message}}
class MessageListener():
    def __init__(self, options, receiverNode):
        remoteAddress = options['remoteAddress']
        encodingType = options['encoding']
        self._encoding = Encoding(encodingType)
        self.adapater = ZMQListener(remoteAddress, self.onMessage,
                                    self._encoding, receiverNode)
        self.messageOriginNodeName = options['messageOriginNodeName']
        self.messageListeners = []

    def registerMessageListener(self, listener):
        self.messageListeners.append(listener)

    def onMessage(self, messageFlowPattern, header, msg):
        start = time.time()
        decodedMsg = self._encoding.decode(header=header, value=msg)
        for listener in self.messageListeners:
            try:
                listener(messageFlowPattern, decodedMsg,
                         self.messageOriginNodeName)
            except Exception as e:
                log.error('Error during MessageListener onMessage {e}',
                          e=str(e))

        end = time.time()
        duration = float((end - start) * 1000)
        return self._encoding.encode({'duration': round(duration, 4)},
                                     plainEncode=True)

    def fetch(self):
        self.adapater.fetch()

    def close(self, force=True):
        closed = False
        try:
            closed = self.adapater.close(force)
        except Exception as e:
            log.error('Exception in adapater.close {e}', e=str(e))
        return closed
import os
import pytest
from hkube_python_wrapper.util.encoding import Encoding
from hkube_python_wrapper.storage.storage_manager import StorageManager
from tests.configs import config

config = config.storage.copy()
config['type'] = 's3'

bucket = 'local-hkube'
encoding = Encoding(config['encoding'])

raw = {"data": 'all_my_data'}
(header, payload) = encoding.encode(raw)
sm = StorageManager(config)


@pytest.fixture(scope="session", autouse=True)
def beforeall():
    sm.storage.adapter.init({'bucket': bucket})


def test_put_get():
    options = {
        'path': bucket + os.path.sep + 'key1',
        'header': header,
        'data': payload
    }
    sm.storage.put(options)
    (header1, payload1) = sm.storage.get(options)
    assert encoding.decode(header=header1, value=payload1) == raw
Exemplo n.º 5
0
def ensure_dir(dirName):
    if not os.path.exists(dirName):
        os.makedirs(dirName)  

config = config.storage
baseDirectory = config["fs"]["baseDirectory"]
rootDirectory = baseDirectory.split('/')[0]
sm = StorageManager(config)
encoding = Encoding(config['encoding'])

dir1 = 'dir1'
dir2 = 'dir2'

raw = {"data": 'all_my_data'}
(header, payload) = encoding.encode({"data": 'all_my_data'})


@pytest.fixture(scope="session", autouse=True)
def beforeall(request):
    ensure_dir(baseDirectory)

    def afterall():
        shutil.rmtree(rootDirectory, ignore_errors=True)

    request.addfinalizer(afterall)


def test_put_get_none():
    (h, p) = encoding.encode(None)
    options = {'path': dir1 + os.path.sep + 'a.txt', 'data': p}
Exemplo n.º 6
0
class DataAdapter:
    def __init__(self, options, dataServer=None):
        self._dataServer = dataServer
        self._storageCache = Cache(config.storage)
        self._encoding = Encoding(options.storage['encoding'])
        self._storageManager = StorageManager(options.storage)
        self._requestEncoding = options.storage['encoding']
        self._requestTimeout = options.discovery['timeout']
        self._networkTimeout = options.discovery['networkTimeout']
        self._maxWorkers = min(32, (multiprocessing.cpu_count() or 1) + 4)
        log.info('using {workers} workers for DataAdapter', workers=self._maxWorkers)

    def encode(self, value):
        return self._encoding.encode(value)

    def decode(self, header=None, value=None):
        return self._encoding.decode(header=header, value=value)

    @trace()
    def getData(self, options):
        jobId = options.get('jobId')
        inputArgs = options.get('input')
        flatInput = options.get('flatInput')
        storage = options.get('storage')

        if (not flatInput):
            return inputArgs

        for k, v in flatInput.items():
            if self._isStorage(v):
                key = v[2:]
                link = storage.get(key, None)
                if (link is None):
                    raise Exception('unable to find storage key')

                if (typeCheck.isList(link)):
                    data = self.batchRequest(link, jobId)
                else:
                    data = self.tryGetDataFromPeerOrStorage(link)

                setPath(inputArgs, k, data)

        return inputArgs

    def _isStorage(self, value):
        return typeCheck.isString(value) and value.startswith('$$')

    def setAlgorithmStorage(self, jobId, input):
        storage = {}
        mappedInput = []
        for item in input:
            taskId = uid(8)
            (header, data) = self.encode(item)
            storageInfo = self.setData({'jobId': jobId, 'taskId': taskId, 'header': header, 'data': data})
            storage[taskId] = {'storageInfo': storageInfo}
            mappedInput.append('$${taskId}'.format(taskId=taskId))
        return (storage, mappedInput)

    @trace()
    def setData(self, options):
        jobId = options.get('jobId')
        taskId = options.get('taskId')
        header = options.get('header')
        data = options.get('data')
        result = self._storageManager.hkube.put(jobId, taskId, header=header, value=data)
        return result

    @timing
    def batchRequest(self, options, jobId):
        batchResponse = []
        for d in options:
            d.update({"jobId": jobId})

        with concurrent.futures.ThreadPoolExecutor(max_workers=self._maxWorkers) as executor:
            for out in executor.map(self._batchRequest, options):
                batchResponse += out

        return batchResponse

    def _batchRequest(self, options):
        batchResponse = []
        jobId = options.get('jobId')
        tasks = options.get('tasks')
        dataPath = options.get('path')
        storageInfo = options.get('storageInfo')
        if (storageInfo):
            storageResult = self._getFromCacheOrStorage(storageInfo, dataPath, storageInfo.get("path"))
            batchResponse.append(storageResult)
            return batchResponse
        tasksNotInCache, batchResponse = self._storageCache.getAll(tasks)
        if (tasksNotInCache):
            options['tasks'] = tasksNotInCache
            results = self._getFromPeer(options)
            for i, item in enumerate(results):
                size, content = item
                peerError = self._getPeerError(content)
                taskId = tasksNotInCache[i]
                if (peerError):
                    storageData = self._getDataForTask(jobId, taskId, dataPath)
                    batchResponse.append(storageData)
                else:
                    self._storageCache.update(taskId, content, size)
                    content = self._getPath(content, dataPath)
                    batchResponse.append(content)

        return batchResponse

    def _getDataForTask(self, jobId, taskId, dataPath):
        path = self._storageManager.hkube.createPath(jobId, taskId)
        return self._getFromCacheOrStorage({'path': path}, dataPath, taskId)

    def tryGetDataFromPeerOrStorage(self, options):
        dataPath = options.get('path')
        storageInfo = options.get('storageInfo')
        discovery = options.get('discovery')
        data = None
        hasResponse = False
        if (discovery):
            cacheId = options.get('taskId')
        else:
            cacheId = storageInfo.get('path')
        data = self._getFromCache(cacheId, dataPath)
        if not (data):
            if (discovery):
                size, data = self._getFromPeer(options)[0]
                peerError = self._getPeerError(data)
                hasResponse = not peerError
                data = None if peerError else data
                if (hasResponse):
                    self._setToCache(cacheId, data, size)
                    data = self._getPath(data, dataPath)
            if (not hasResponse and storageInfo):
                data = self._getFromCacheOrStorage(storageInfo, dataPath, cacheId)

        return data

    @trace(name='getFromPeer')
    @timing
    def _getFromPeer(self, options):
        taskId = options.get('taskId')
        tasks = [taskId] if taskId else options.get('tasks')
        discovery = options.get('discovery')
        port = discovery.get('port')
        host = discovery.get('host')

        if (self._dataServer and self._dataServer.isLocal(host, port)):
            dataList = self._dataServer.getDataByTaskId(tasks)
            responses = []
            for header, payload in dataList:
                responses.append((len(payload), self.decode(header=header, value=payload)))
        else:
            request = {
                'address': {
                    'port': port,
                    'host': host
                },
                'tasks': tasks,
                'encoding': self._requestEncoding,
                'timeout': self._requestTimeout,
                'networkTimeout': self._networkTimeout
            }
            dataRequest = DataRequest(request)
            responses = dataRequest.invoke()
        return responses

    def _getPeerError(self, options):
        error = None
        if (typeCheck.isDict(options)):
            error = options.get('hkube_error')

        return error

    def _getFromCacheOrStorage(self, options, dataPath, cacheID):
        data = self._getFromCache(cacheID, dataPath)
        if (data is None):
            size, data = self._getFromStorage(options)
            self._setToCache(cacheID, data, size)
            data = self._getPath(data, dataPath)

        return data

    @trace(name='getFromCache')
    @timing
    def _getFromCache(self, cacheId, dataPath):
        data = self._storageCache.get(cacheId)
        data = self._getPath(data, dataPath)
        return data

    def _setToCache(self, cacheId, data, size):
        self._storageCache.update(cacheId, data, size)

    @trace(name='getFromStorage')
    @timing
    def _getFromStorage(self, options):
        (header, payload) = self._storageManager.storage.get(options)
        decoded = self.decode(header=header, value=payload)
        size = len(payload)
        return (size, decoded)

    def createStorageInfo(self, options):
        jobId = options.get('jobId')
        taskId = options.get('taskId')
        encodedData = options.get('encodedData')

        path = self._storageManager.hkube.createPath(jobId, taskId)
        metadata = self.createMetadata(options)

        storageInfo = {
            'storageInfo': {
                'path': path,
                'size': len(encodedData) if encodedData else 0
            },
            'metadata': metadata
        }
        return storageInfo

    def createMetadata(self, options):
        nodeName = options.get('nodeName')
        data = options.get('data')
        savePaths = options.get('savePaths', [])

        metadata = dict()
        objData = dict()
        objData[nodeName] = data
        for path in savePaths:
            try:
                value = getPath(objData, path)
                if (value != 'DEFAULT'):
                    meta = self._getMetadata(value)
                    metadata[path] = meta
            except Exception:
                pass

        return metadata

    def _getMetadata(self, value):
        if (typeCheck.isDict(value)):
            meta = {'type': 'object'}
        elif (typeCheck.isList(value)):
            meta = {'type': 'array', 'size': len(value)}
        else:
            meta = {'type': str(type(value).__name__)}
        return meta

    def _getPath(self, data, dataPath):
        if (data and dataPath):
            newData = getPath(data, dataPath)
            if (newData == 'DEFAULT'):
                newData = None
        else:
            newData = data
        return newData
class WebSocketServerClass:
    def __init__(self, encoding, server):
        sm = StorageManager(config.storage)
        storageEncoding = Encoding(config.storage.get('encoding'))
        self._server = server
        self._server.set_fn_new_client(self.handleConnected)
        self._server.set_fn_client_left(self.handleDisconnected)
        self._server.set_fn_message_received(self.handleMessage)
        self._encoding = Encoding(encoding)
        def getAlgorithmResult(request):
            execId = request.get('execId')
            storage= request.get('storage')
            storageInput= request.get('storageInput')
            input= request.get('input')
            if (not storageInput):
                response=input
            else:
                oneInput = storageInput[0]
                key = oneInput[2:]
                storageInfo=storage.get(key).get('storageInfo')
                (header,encoded)=sm.storage.get(storageInfo)
                decoded=storageEncoding.decode(header=header, value=encoded)
                response=[decoded]
            return {
                'execId': execId,
                'storage': storage,
                'response': response
            }
        self._commands = {
            "initialized":  {
                'command': "start",
                'data': lambda x: x
            },
            "startAlgorithmExecution": {
                'command': "algorithmExecutionDone",
                'data': getAlgorithmResult
            },
            "startStoredSubPipeline": {
                'command': "subPipelineDone",
                'data': lambda x: {
                    'subPipelineId': x.get('subPipelineId'),
                    'response': x.get('subPipeline').get('flowInput')
                }
            }
        }

    def handleMessage(self, client, server, message):
        decoded = self._encoding.decode(value=message, plainEncode=True)
        command = decoded["command"]
        data = decoded.get("data", None)
        commandBack = self._commands.get(command)
        if(commandBack):
            msgBack = {
                "command": commandBack["command"],
                "data": commandBack["data"](data)
            }
            self.sendMsgToClient(client, msgBack)

    def handleConnected(self, client, server):
        # print('ws connected')
        self.sendMsgToClient(client, {'command': 'initialize', 'data': mockdata.initData})

    def handleDisconnected(self, client, server):
        # print('ws disconnected')
        pass

    def sendMsgToClient(self, client, data):
        self._server.send_message(client, self._encoding.encode(data, plainEncode=True))
Exemplo n.º 8
0
def test_encoding_header_in_payload_object():
    encoding = Encoding('msgpack')
    data = createObject(size, size)
    (header, payload) = encoding.encode(data)
    decoded = encoding.decode(header=None, value=header + payload)
    assert data == decoded
Exemplo n.º 9
0
def test_encoding_no_header_bytes():
    encoding = Encoding('msgpack')
    data = create_bytearray(size)
    (_, payload) = encoding.encode(data)
    decoded = encoding.decode(header=None, value=payload)
    assert data == decoded
Exemplo n.º 10
0
def test_msgpack_encoding():
    encoding = Encoding('msgpack')
    data = create_bytearray(size)
    (header, payload) = encoding.encode(data)
    decoded = encoding.decode(header=header, value=payload)
    assert data == decoded
Exemplo n.º 11
0
def test_bson_encoding():
    encoding = Encoding('bson')
    data = createObject(size, size)
    (header, payload) = encoding.encode(data)
    decoded = encoding.decode(header=header, value=payload)
    assert data == decoded
Exemplo n.º 12
0
def test_json_encoding():
    encoding = Encoding('json')
    data = createObjectJson(size)
    encoded = encoding.encode(data, plainEncode=True)
    decoded = encoding.decode(value=encoded, plainEncode=True)
    assert data == decoded
class MessageProducer(DaemonThread):
    def __init__(self, options, consumerNodes, nodeName):
        self.nodeNames = consumerNodes
        port = options['port']
        maxMemorySize = options['messagesMemoryBuff'] * 1024 * 1024
        encodingType = options['encoding']
        statisticsInterval = options['statisticsInterval']
        self._encoding = Encoding(encodingType)
        self.adapter = ZMQProducer(port,
                                   maxMemorySize,
                                   self.responseAccumulator,
                                   self.queueTimeAccumulator,
                                   consumerTypes=self.nodeNames,
                                   encoding=self._encoding,
                                   nodeName=nodeName)
        self.durationsCache = {}
        self.grossDurationCache = {}
        self.queueTimeCache = {}
        self.responseCount = {}
        self.active = True
        self.printStatistics = 0
        for consumer in consumerNodes:
            self.durationsCache[consumer] = FifoArray(RESPONSE_CACHE)
            self.grossDurationCache[consumer] = FifoArray(RESPONSE_CACHE)
            self.queueTimeCache[consumer] = FifoArray(RESPONSE_CACHE)
            self.responseCount[consumer] = 0
        self.listeners = []

        def sendStatisticsEvery(interval):
            while (self.active):
                self.sendStatistics()
                time.sleep(interval)
            self.sendStatistics()

        if (self.nodeNames):
            runThread = Thread(name="Statistics",
                               target=sendStatisticsEvery,
                               args=[statisticsInterval])
            runThread.daemon = True
            runThread.start()
        DaemonThread.__init__(self, "MessageProducer")

    def produce(self, messageFlowPattern, obj):
        header, encodedMessage = self._encoding.encode(obj)
        self.adapter.produce(header,
                             encodedMessage,
                             messageFlowPattern=messageFlowPattern)

    def responseAccumulator(self, response, consumerType, grossDuration):
        decodedResponse = self._encoding.decode(value=response,
                                                plainEncode=True)
        duration = decodedResponse['duration']
        self.durationsCache[consumerType].append(float(duration))
        self.grossDurationCache[consumerType].append(grossDuration)
        self.responseCount[consumerType] += 1

    def queueTimeAccumulator(self, consumerType, queueTime):
        self.queueTimeCache[consumerType].append(queueTime)

    def resetDurationsCache(self, consumerType):
        durationPerNode = self.durationsCache[consumerType].getAsArray()
        self.durationsCache[consumerType].reset()
        return durationPerNode

    def resetGrossDurationsCache(self, consumerType):
        durationPerNode = self.grossDurationCache[consumerType].getAsArray()
        self.grossDurationCache[consumerType].reset()
        return durationPerNode

    def resetQueueDurationsCache(self, consumerType):
        durationPerNode = self.queueTimeCache[consumerType].getAsArray()
        self.queueTimeCache[consumerType].reset()
        return durationPerNode

    def getResponseCount(self, consumerType):
        return self.responseCount[consumerType]

    def registerStatisticsListener(self, listener):
        self.listeners.append(listener)

    def sendStatistics(self):
        statistics = []
        for nodeName in self.nodeNames:
            queueSize = self.adapter.queueSize(nodeName)
            sent = self.adapter.sent(nodeName)
            singleNodeStatistics = {
                "nodeName": nodeName,
                "sent": sent,
                "queueSize": queueSize,
                "netDurations": self.resetDurationsCache(nodeName),
                "durations": self.resetGrossDurationsCache(nodeName),
                "queueDurations": self.resetQueueDurationsCache(nodeName),
                "responses": self.getResponseCount(nodeName),
                "dropped": self.adapter.messageQueue.lostMessages[nodeName]
            }
            statistics.append(singleNodeStatistics)
        for listener in self.listeners:
            listener(statistics)
        if (self.printStatistics % 30 == 0):
            log.debug("statistics {stats}", stats=str(statistics))
        self.printStatistics += 1

    def run(self):
        self.adapter.start()

    def close(self, force=True):
        if not (self.active):
            log.warning("Attempting to close inactive MessageProducer")
        else:
            self.adapter.close(force)
            self.active = False