def main(doc, args=None): args = args or {} msid, version = utils.version_from_path(getattr(doc, 'name', doc)) ctx = { 'version': version, 'override': {}, 'fill-missing-image-dimensions': False } ctx.update(args) try: article_json = render_single(doc, **ctx) return json.dumps(article_json, indent=4) except AssertionError: # business error log_ctx = { 'doc': str(doc), # context needs to be json serializable 'msid': msid, 'version': version, 'override': ctx['override'], } LOG.error("failed to scrape article", extra=log_ctx) raise except Exception: # unhandled exception log_ctx = { 'doc': str(doc), # context needs to be json serializable 'msid': msid, 'version': version, 'render-ctx': ctx, #'override': ctx['override'], } LOG.exception("failed to scrape article", extra=log_ctx) raise
def main(doc, args=None): args = args or {} msid, version = utils.version_from_path(getattr(doc, 'name', doc)) ctx = { 'version': version, 'override': {}, 'fill-missing-image-dimensions': False } ctx.update(args) try: article_json = render_single(doc, **ctx) return json.dumps(article_json, indent=4) except AssertionError: # business error log_ctx = { 'doc': str(doc), # context needs to be json serializable 'msid': msid, 'version': version, 'override': ctx['override'], } LOG.error("failed to scrape article", extra=log_ctx) raise except Exception: # unhandled exception log_ctx = { 'doc': str(doc), # context needs to be json serializable 'msid': msid, 'version': version, 'render-ctx': ctx, # 'override': ctx['override'], } LOG.exception("failed to scrape article", extra=log_ctx) raise
def main(doc, quiet=False): contents = json.load(doc) schema = conf.POA_SCHEMA if is_poa(contents) else conf.VOR_SCHEMA filename = os.path.basename(doc.name) msid, ver = utils.version_from_path(filename) log_context = {'json-filename': filename, 'msid': msid, 'version': ver} try: jsonschema.validate(contents["article"], schema) LOG.info("validated %s", msid, extra=log_context) return True, contents except jsonschema.ValidationError as err: LOG.error("failed to validate %s: %s", msid, err, extra=log_context) if quiet: return False, contents raise
def main(doc, quiet=False): contents = json.load(doc) schema = conf.POA_SCHEMA if is_poa(contents) else conf.VOR_SCHEMA filename = os.path.basename(doc.name) msid, ver = utils.version_from_path(filename) log_context = { 'json-filename': filename, 'msid': msid, 'version': ver } try: jsonschema.validate(contents["article"], schema) LOG.info("validated %s", msid, extra=log_context) return True, contents except jsonschema.ValidationError as err: LOG.error("failed to validate %s: %s", msid, err, extra=log_context) if quiet: return False, contents raise
def mkreq(path, **overrides): ensure(not path.startswith('http://'), "no insecure requests, please") # TODO: consider removing these two lines if path.lstrip('/').startswith('article-xml/articles/'): path = os.path.abspath(path) path = 'file://' + path if not path.startswith('https://') else path msid, ver = utils.version_from_path(path) request = { 'action': conf.INGEST, 'location': path, 'id': msid, 'version': ver, 'force': True, # TODO: shouldn't this be False? 'validate-only': False, 'token': 'pants-party' } request.update(overrides) # don't ever generate an invalid request utils.validate(request, conf.REQUEST_SCHEMA) return request
def post_xml(): "upload jats xml, generate xml, validate, send to lax as a dry run" http_ensure('xml' in request.files, "xml file required", 404) try: override = scraper.deserialize_overrides(request.form.getlist('override')) except ValueError as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_OVERRIDES, 'message': 'an error occurred attempting to parse your given overrides.', 'trace': sio.getvalue() } # upload try: xml = request.files['xml'] filename = xml.filename # todo: sanitize this. assumes a name like 'elife-00000-v1.xml' http_ensure(os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml") path = join(upload_folder(), filename) xml.save(path) except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_UPLOAD, 'message': 'an error occured uploading the article xml to be processed', 'trace': sio.getvalue(), }, 400 # everything is always the client's fault. # generate try: article_json = scraper.main(path, { 'override': override, 'fill-missing-image-dimensions': True }) json_filename = filename + '.json' json_path = join(upload_folder(), json_filename) open(json_path, 'w').write(article_json) except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_SCRAPE, 'message': str(err), 'trace': sio.getvalue() }, 400 # validate try: ajson_validate.main(open(json_path, 'r')) except jsonschema.ValidationError as err: return { 'status': conf.INVALID, 'code': conf.ERROR_INVALID, 'message': 'the generated article-json failed validation, see trace for details.', 'trace': str(err), # todo: any good? }, 400 except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_VALIDATING, 'message': 'an error occurred attempting to validate the generated article-json', 'trace': sio.getvalue() }, 400 # send to lax try: msid, version = utils.version_from_path(filename) token = str(uuid.uuid4()) args = { # the *most* important parameter. don't modify lax. 'dry_run': True, # a forced ingest by default 'action': conf.INGEST, 'force': True, # article details 'id': msid, 'version': int(version), 'article_json': article_json, 'token': token, } lax_resp = adaptor.call_lax(**args) context = utils.renkeys(lax_resp, [("message", "lax-message")]) LOG.info("lax response", extra=context) api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace']) if api_resp['status'] in [conf.INVALID, conf.ERROR]: # failure return api_resp, 400 # success # 'code', 'message' and 'trace' are not returned by lax on success, just 'status' api_resp['ajson'] = json.loads(article_json)['article'] api_resp['override'] = override return api_resp, 200 except Exception as err: # lax returned something indecipherable sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_COMMUNICATING, 'message': "lax responded with something that couldn't be decoded", 'trace': sio.getvalue(), }, 400