Пример #1
0
def handle_object(obj, result={}):
    global OBJECTS

    obj_str = str(type(obj))

    if obj is not None:
        result['object'] = True
        result['class'] = obj.__class__.__name__

        class_str = str(obj.__class__)

        # if the resulting object can be pickled just send the raw object
        # REFACTOR:
        if 'types.Row' in obj_str or 'pandas.' in obj_str or list == type(obj):
            logger.info('Pickling object type %s' % (str(type(obj))))
            result['class'] = 'pickle'
            result['value'] = str(base64.b64encode(pickle.dumps(obj, 2)), 'utf-8')
        elif 'pyspark' in class_str or 'py4j' in class_str or type(obj) == types.FunctionType:
            id = str(uuid.uuid4())
            OBJECTS[id] = obj

            result['object'] = True
            result['object_id'] = id
            result['path'] = f'{obj.__module__}.{obj.__class__.__name__}'
            result['module'] = obj.__module__
            result['class'] = obj.__class__.__name__
        else:
            result['value'] = obj

    return result
Пример #2
0
def retrieve_object(obj):
    global OBJECTS

    if type(obj) == dict and '_PROXY_ID' in obj:
        id = obj['_PROXY_ID']
        logger.info('Retrieving object id: %s' % id)

        return OBJECTS[id]
    else:
        return obj
Пример #3
0
def call():
    req = request.json
    digest = req['digest']

    if digest in RESPONSE_CACHE:
        logger.info(f'Returning cached response for {digest}')

        resp = RESPONSE_CACHE[digest]
        resp['cached'] = True
    else:
        resp = {'status': 'pending'}
        REQ_QUEUE.put(req)

    return jsonify(resp)
Пример #4
0
def run(*args, **kwargs):
    threading.Thread(target=call_worker, daemon=True).start()

    if 'debug' not in kwargs or ('debug' in kwargs and kwargs['debug'] == False):
        app.logger.removeHandler(default_handler)
        app.logger = logger

        logger.info('Starting pysparkrpc web server')

    if 'auth' in kwargs and kwargs['auth'] != '':
        app.auth_token = kwargs['auth']
    else:
        app.auth_token = None

    del kwargs['auth']

    if 'port' not in kwargs:
        kwargs['port'] = 8765

    app.run(*args, **kwargs)
Пример #5
0
def call_worker():
    global OBJECTS, REQ_QUEUE, RESP_QUEUE, RESPONSE_CACHE, Capture, logger

    while True:
        req = REQ_QUEUE.get()
        digest = req['digest']

        logger.info('Request:')
        logger.info(req)

        if digest in RESPONSE_CACHE:
            logger.info(f'Returning cached response for {digest}')

            resp = RESPONSE_CACHE[digest]
            resp['cached'] = True

            RESP_QUEUE.put(resp)
            continue

        res_obj = None

        resp = {
            'status': 'complete',
            'object': False,
            'object_id': None,
            'module': None,
            'class': None,
            'exception': None,
            'stdout': [],
            'cached': False,
            'digest': digest
        }

        if req['object_id'] != None:
            obj = OBJECTS[req['object_id']]

            if req['function'] != None:
                callable_obj = getattr(obj, req['function'])
            else:
                callable_obj = obj
        else:
            # have to loop through the module path and getattr
            # one by one because importing the whole module str
            # isn't always valid. For example pyspark.sql.session.SparkSession.Builder
            module_paths = req['path'].split('.')
            base_module = __import__(module_paths[0])

            module = base_module

            if len(module_paths) > 1:
                for m in module_paths[1:]:
                    module = getattr(module, m)

            if req['function'] != None:
                callable_obj = getattr(module, req['function'])
            # if no function is passed, it means the base object needs init
            else:
                callable_obj = module

        args, kwargs = parse_request_args(req['function'], req['args'], req['kwargs'])

        try:
            with Capture() as stdout:
                if req['is_property']:
                    res_obj = callable_obj
                elif req['is_item']:
                    res_obj = callable_obj[req['function']]
                else:
                    res_obj = callable_obj(*args, **kwargs)

                resp['stdout'] = stdout
        except Exception as e:
            resp['exception'] = str(e)

        resp = handle_object(res_obj, resp)

        logger.info('Response:')
        logger.info(resp)

        if req['cache'] and resp['exception'] == None:
            RESPONSE_CACHE[digest] = resp

        RESP_QUEUE.put(resp)
        REQ_QUEUE.task_done()