Exemplo n.º 1
0
def expand_videos(msid, video):
    gc_data = metadata(msid)  # cached on first hit
    gc_id_str = ", ".join(gc_data.keys())

    v_id = video['id']
    ensure(v_id in gc_data,
           "glencoe doesn't know %r, only %r" % (v_id, gc_id_str))

    video_data = gc_data[v_id]
    video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height'])
    video_data = utils.renkeys(video_data, [('jpg_href', 'image')])

    func = lambda mtype: {
        'mediaType': SOURCES[mtype],
        'uri': gc_data[v_id][mtype + "_href"]
    }
    video_data['sources'] = lmap(func, SOURCES)
    video.update(video_data)

    del video['uri']  # returned by elife-tools, not part of spec

    # Add placeholder, the video thumbnail image
    video["placeholder"] = {}
    video["placeholder"]["uri"] = video["image"].split('/')[-1]
    video["placeholder"]["alt"] = ""

    return video
Exemplo n.º 2
0
def expand_videos(msid, video):
    gc_data = metadata(msid) # cached on first hit
    gc_id_str = ", ".join(gc_data.keys())

    v_id = video['id']
    ensure(gc_data, "glencoe doesn't know %r, it doesn't have any media")
    ensure(v_id in gc_data, "glencoe doesn't know %r, only %r" % (v_id, gc_id_str))

    video_data = gc_data[v_id]
    video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height'])
    video_data = utils.renkeys(video_data, [('jpg_href', 'image')])

    func = lambda mtype: OrderedDict([
        ('mediaType', SOURCES[mtype]),
        ('uri', gc_data[v_id][mtype + "_href"])
    ])
    video_data['sources'] = lmap(func, SOURCES)
    video.update(video_data)

    del video['uri'] # returned by elife-tools, not part of spec

    # Add placeholder, the video thumbnail image
    video["placeholder"] = OrderedDict()
    video["placeholder"]["uri"] = video["image"].split('/')[-1]
    video["placeholder"]["alt"] = ""

    return video
Exemplo n.º 3
0
def mkresponse(status, message, request={}, **kwargs):
    packet = {
        "status": status,
        "message": message,
        "id": None,
        "token": None,
        "datetime": datetime.now(),
    }

    request = subdict(request, ['id', 'token', 'action'])
    packet.update(request)

    # merge in any explicit overrides
    packet.update(kwargs)

    # more response wrangling
    packet = renkeys(packet, [("action", "requested-action"),
                              ("dry-run", "validate-only")])

    # remove any keys not supported in the schema
    supported_keys = conf.RESPONSE_SCHEMA['properties'].keys()
    packet = subdict(packet, supported_keys)

    # wrangle log context
    context = renkeys(packet, [("message", "status-message")])
    levels = {
        INVALID: logging.ERROR,
        ERROR: logging.ERROR,
        VALIDATED: logging.INFO,
        INGESTED: logging.DEBUG,
        PUBLISHED: logging.DEBUG
    }
    LOG.log(levels[packet["status"]],
            "%s response",
            packet['status'],
            extra=context)

    # success messages are None
    if not packet['message']:
        del packet['message']

    # double-publications are successful
    if kwargs.get('code') == 'already-published':
        packet['status'] = PUBLISHED

    return packet
Exemplo n.º 4
0
def mkresponse(status, message, request={}, **kwargs):
    packet = {
        "status": status,
        "message": message,
        "id": None,
        "token": None,
        "datetime": datetime.now(),
    }

    request = subdict(request, ['id', 'token', 'action'])
    packet.update(request)

    # merge in any explicit overrides
    packet.update(kwargs)

    # more response wrangling
    packet = renkeys(packet, [
        ("action", "requested-action"),
        ("dry-run", "validate-only")
    ])

    # remove any keys not supported in the schema
    supported_keys = conf.RESPONSE_SCHEMA['properties'].keys()
    packet = subdict(packet, supported_keys)

    # wrangle log context
    context = renkeys(packet, [("message", "status-message")])
    levels = {
        INVALID: logging.ERROR,
        ERROR: logging.ERROR,
        VALIDATED: logging.INFO,
        INGESTED: logging.DEBUG,
        PUBLISHED: logging.DEBUG
    }
    LOG.log(levels[packet["status"]], "%s response", packet['status'], extra=context)

    # success messages are None
    if not packet['message']:
        del packet['message']

    # double-publications are successful
    if kwargs.get('code') == 'already-published':
        packet['status'] = PUBLISHED

    return packet
Exemplo n.º 5
0
def post_xml():
    "upload jats xml, generate xml, validate, send to lax as a dry run"
    http_ensure('xml' in request.files, "xml file required", 400)

    try:
        override = scraper.deserialize_overrides(
            request.form.getlist('override'))
    except ValueError:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_OVERRIDES,
            'message':
            'an error occurred attempting to parse your given overrides.',
            'trace': sio.getvalue()
        }, 400

    # upload
    try:
        xml = request.files['xml']
        filename = os.path.basename(xml.filename)
        http_ensure(
            os.path.splitext(filename)[1] == '.xml',
            "file doesn't look like xml")
        path = join(upload_folder(), filename)
        xml.save(path)

    except Exception:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_UPLOAD,
            'message':
            'an error occured uploading the article xml to be processed',
            'trace': sio.getvalue(),
        }, 400  # shouldn't this be a 500? everything is always the client's fault.

    # generate
    try:
        article_json = scraper.main(path, {
            'override': override,
            'fill-missing-image-dimensions': True
        })
        json_filename = filename + '.json'
        json_path = join(upload_folder(), json_filename)
        open(json_path, 'w').write(article_json)

    except Exception as err:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_SCRAPE,
            'message': str(err),
            'trace': sio.getvalue()
        }, 400

    # validate
    try:
        conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r'))

    except jsonschema.ValidationError as err:
        return {
            'status': conf.INVALID,
            'code': conf.ERROR_INVALID,
            'message':
            'the generated article-json failed validation, see trace for details.',
            'trace': str(err),  # todo: any good?
        }, 400

    except Exception:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_VALIDATING,
            'message':
            'an error occurred attempting to validate the generated article-json',
            'trace': sio.getvalue()
        }, 400  # TODO: shouldn't this be a 500?

    # send to lax
    try:
        #msid, version = utils.version_from_path(filename)
        msid = request.args['id']
        version = request.args['version']
        token = str(uuid.uuid4())
        args = {
            # the *most* important parameter. don't modify lax.
            'dry_run': True,

            # a forced ingest by default
            'action': conf.INGEST,
            'force': True,

            # article details
            'msid': msid,
            'version': int(version),
            'article_json': article_json,
            'token': token,
        }
        lax_resp = adaptor.call_lax(**args)

        context = utils.renkeys(lax_resp, [("message", "lax-message")])
        LOG.info("lax response", extra=context)

        api_resp = utils.subdict(lax_resp,
                                 ['status', 'code', 'message', 'trace'])

        if api_resp['status'] in [conf.INVALID, conf.ERROR]:
            # failure
            return api_resp, 400

        # success
        # 'code', 'message' and 'trace' are not returned by lax on success, just 'status'
        api_resp['ajson'] = json.loads(article_json)['article']
        api_resp['override'] = override
        return api_resp, 200

    except Exception:
        # lax returned something indecipherable
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_COMMUNICATING,
            'message': "lax responded with something that couldn't be decoded",
            'trace': sio.getvalue(),
        }, 400  # TODO: shouldn't this be a 500?
Exemplo n.º 6
0
def handler(json_request, outgoing):
    response = partial(send_response, outgoing)

    try:
        request = utils.validate(json_request, conf.REQUEST_SCHEMA)
    except ValueError as err:
        # bad data. who knows what it was. die
        return response(
            mkresponse(ERROR,
                       "request could not be parsed: %s" % json_request))

    except ValidationError as err:
        # data is readable, but it's in an unknown/invalid format. die
        return response(
            mkresponse(ERROR, "request was incorrectly formed: %s" % str(err)))

    except Exception as err:
        # die
        msg = "unhandled error attempting to handle request: %s" % str(err)
        return response(mkresponse(ERROR, msg))

    # we have a valid request :)
    LOG.info("valid request")

    params = subdict(
        request,
        ['action', 'id', 'token', 'version', 'force', 'validate-only'])
    params = renkeys(params, [('validate-only', 'dry_run')])

    # if we're to ingest/publish, then we expect a location to download article data
    if params['action'] in [INGEST, INGEST_PUBLISH]:
        try:
            article_xml = download(request['location'])
            if not article_xml:
                raise ValueError("no article content available")

        except AssertionError as err:
            msg = "refusing to download article xml: %s" % str(err)
            return response(mkresponse(ERROR, msg, request))

        except Exception as err:
            msg = "failed to download article xml from %r: %s" % (
                request['location'], str(err))
            return response(mkresponse(ERROR, msg, request))

        LOG.info("got xml")

        try:
            article_data = scraper.render_single(article_xml,
                                                 version=params['version'],
                                                 location=request['location'])
            LOG.info("rendered article data ")

        except Exception as err:
            error = str(err) if hasattr(err, 'message') else err
            msg = "failed to render article-json from article-xml: %s" % error
            LOG.exception(msg, extra=params)
            return response(mkresponse(ERROR, msg, request))

        LOG.info("successful scrape")

        try:
            article_json = utils.json_dumps(article_data)
        except ValueError as err:
            msg = "failed to serialize article data to article-json: %s" % str(
                err)
            return response(mkresponse(ERROR, msg, request))

        LOG.info("successfully serialized article-data to article-json")

        # phew! gauntlet ran, we're now confident of passing this article-json to lax
        # lax may still reject the data as invalid, but we'll proxy that back if necessary
        params['article_json'] = article_json

    try:
        LOG.info("calling lax")  # with params: %r" % params)

        lax_response = call_lax(**params)

        LOG.info("lax response: %r", lax_response)

        return response(mkresponse(**lax_response))

    except Exception as err:
        # lax didn't understand us or broke
        msg = "lax failed attempting to handle our request: %s" % str(err)
        response(mkresponse(ERROR, msg, request))
        # when lax fails, we fail
        raise
Exemplo n.º 7
0
def post_xml():
    "upload jats xml, generate xml, validate, send to lax as a dry run"
    http_ensure('xml' in request.files, "xml file required", 400)

    try:
        override = scraper.deserialize_overrides(request.form.getlist('override'))
    except ValueError:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_OVERRIDES,
            'message': 'an error occurred attempting to parse your given overrides.',
            'trace': sio.getvalue()
        } # shouldn't this be a 400?

    # upload
    try:
        xml = request.files['xml']
        filename = os.path.basename(xml.filename)
        http_ensure(os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml")
        path = join(upload_folder(), filename)
        xml.save(path)

    except Exception:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_UPLOAD,
            'message': 'an error occured uploading the article xml to be processed',
            'trace': sio.getvalue(),
        }, 400 # shouldn't this be a 500?

    # generate
    try:
        article_json = scraper.main(path, {
            'override': override,
            'fill-missing-image-dimensions': True
        })
        json_filename = filename + '.json'
        json_path = join(upload_folder(), json_filename)
        open(json_path, 'w').write(article_json)

    except Exception as err:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_SCRAPE,
            'message': str(err),
            'trace': sio.getvalue()
        }, 400

    # validate
    try:
        conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r'))

    except jsonschema.ValidationError as err:
        return {
            'status': conf.INVALID,
            'code': conf.ERROR_INVALID,
            'message': 'the generated article-json failed validation, see trace for details.',
            'trace': str(err), # todo: any good?
        }, 400

    except Exception:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_VALIDATING,
            'message': 'an error occurred attempting to validate the generated article-json',
            'trace': sio.getvalue()
        }, 400 # TODO: shouldn't this be a 500?

    # send to lax
    try:
        #msid, version = utils.version_from_path(filename)
        msid = request.args['id']
        version = request.args['version']
        token = str(uuid.uuid4())
        args = {
            # the *most* important parameter. don't modify lax.
            'dry_run': True,

            # a forced ingest by default
            'action': conf.INGEST,
            'force': True,

            # article details
            'msid': msid,
            'version': int(version),
            'article_json': article_json,

            'token': token,
        }
        lax_resp = adaptor.call_lax(**args)

        context = utils.renkeys(lax_resp, [("message", "lax-message")])
        LOG.info("lax response", extra=context)

        api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace'])

        if api_resp['status'] in [conf.INVALID, conf.ERROR]:
            # failure
            return api_resp, 400

        # success
        # 'code', 'message' and 'trace' are not returned by lax on success, just 'status'
        api_resp['ajson'] = json.loads(article_json)['article']
        api_resp['override'] = override
        return api_resp, 200

    except Exception:
        # lax returned something indecipherable
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_COMMUNICATING,
            'message': "lax responded with something that couldn't be decoded",
            'trace': sio.getvalue(),
        }, 400 # TODO: shouldn't this be a 500?
Exemplo n.º 8
0
def handler(json_request, outgoing):
    response = partial(send_response, outgoing)

    try:
        request = utils.validate(json_request, conf.REQUEST_SCHEMA)
    except ValueError:
        # bad data. who knows what it was. die
        return response(mkresponse(ERROR, "request could not be parsed: %s" % json_request))

    except ValidationError as err:
        # data is readable, but it's in an unknown/invalid format. die
        return response(mkresponse(ERROR, "request was incorrectly formed: %s" % str(err)))

    except Exception as err:
        # die
        msg = "unhandled error attempting to handle request: %s" % str(err)
        return response(mkresponse(ERROR, msg))

    # we have a valid request :)
    LOG.info("valid request")

    params = subdict(request, ['action', 'id', 'token', 'version', 'force', 'validate-only'])
    params = renkeys(params, [('validate-only', 'dry_run'), ('id', 'msid')])

    # if we're to ingest/publish, then we expect a location to download article data
    if params['action'] in [INGEST, INGEST_PUBLISH]:
        try:
            article_xml = download(request['location'])
            if not article_xml:
                raise ValueError("no article content available")

        except AssertionError as err:
            msg = "refusing to download article xml: %s" % str(err)
            return response(mkresponse(ERROR, msg, request))

        except Exception as err:
            msg = "failed to download article xml from %r: %s" % (request['location'], str(err))
            return response(mkresponse(ERROR, msg, request))

        LOG.info("got xml")

        try:
            article_data = scraper.render_single(article_xml,
                                                 version=params['version'],
                                                 location=request['location'])
            LOG.info("rendered article data ")

        except Exception as err:
            error = str(err) if hasattr(err, 'message') else err
            msg = "failed to render article-json from article-xml: %s" % error
            LOG.exception(msg, extra=params)
            return response(mkresponse(ERROR, msg, request))

        LOG.info("successful scrape")

        try:
            article_json = utils.json_dumps(article_data)
        except ValueError as err:
            msg = "failed to serialize article data to article-json: %s" % str(err)
            return response(mkresponse(ERROR, msg, request))

        LOG.info("successfully serialized article-data to article-json")

        # phew! gauntlet ran, we're now confident of passing this article-json to lax
        # lax may still reject the data as invalid, but we'll proxy that back if necessary
        params['article_json'] = article_json

    try:
        LOG.info("calling lax")

        lax_response = call_lax(**params)

        LOG.info("lax response: %r", lax_response)

        return response(mkresponse(**lax_response))

    except Exception as err:
        # lax didn't understand us or broke
        msg = "lax failed attempting to handle our request: %s" % str(err)
        response(mkresponse(ERROR, msg, request))
        # when lax fails, we fail
        raise