def refextract_url(): """Run refextract on a URL.""" if current_app.config.get("FEATURE_FLAG_ENABLE_REFEXTRACT_SERVICE"): headers = { "Content-Type": "application/json", "Accept": "application/json" } data = { "journal_kb_data": create_journal_dict(), "url": request.json["url"] } response = requests.post( f"{current_app.config['REFEXTRACT_SERVICE_URL']}/extract_references_from_url", headers=headers, data=orjson.dumps(data), ) if response.status_code != 200: return jsonify({"message": "Can not extract references"}, 500) extracted_references = response.json()["extracted_references"] else: extracted_references = extract_references_from_url( request.json["url"], override_kbs_files={"journals": create_journal_dict()}, reference_format="{title},{volume},{page}", ) deduplicated_extracted_references = dedupe_list(extracted_references) references = map_refextract_to_schema(deduplicated_extracted_references) match_result = match_references(references) return jsonify(match_result.get("matched_references"))
def refextract_url(): """Run refextract on a URL.""" extracted_references = extract_references_from_url( request.json["url"], override_kbs_files={"journals": create_journal_dict()}, reference_format="{title},{volume},{page}", ) references = map_refextract_to_schema(extracted_references) match_result = match_references(references) return jsonify(match_result.get("matched_references"))
def refextract_url(): """Run refextract on a URL.""" with local_refextract_kbs_path() as kbs_path: extracted_references = extract_references_from_url( request.json["url"], override_kbs_files=kbs_path, reference_format="{title},{volume},{page}", ) references = map_refextract_to_schema(extracted_references) match_result = match_references(references) return jsonify(match_result.get("matched_references"))
def refextract_text(): """Run refextract on a piece of text.""" with local_refextract_kbs_path() as kbs_path: extracted_references = extract_references_from_string( request.json["text"], override_kbs_files=kbs_path, reference_format="{title},{volume},{page}", ) references = map_refextract_to_schema(extracted_references) references = match_references(references) return jsonify(references)
def test_map_refextract_to_schema(): refextract_input = [{ "author": [u"F. Englert and R. Brout"], "doi": [u"doi:10.1103/PhysRevLett.13.321"], "journal_page": [u"321"], "journal_reference": [u"Phys. Rev. Lett. 13 (1964) 321"], "journal_title": [u"Phys. Rev. Lett."], "journal_volume": [u"13"], "journal_year": [u"1964"], "linemarker": [u"1"], "raw_ref": [ u"[1] F. Englert and R. Brout, \u201cBroken symmetry and the mass of gauge vector mesons\u201d, Phys. Rev. Lett. 13 (1964) 321, doi:10.1103/PhysRevLett.13.321." ], "texkey": [u"Englert:1964et"], "year": [u"1964"], }] expected_output = [{ "reference": { "authors": [{ "full_name": "Englert, F." }, { "full_name": "Brout, R." }], "dois": ["10.1103/PhysRevLett.13.321"], "misc": ["Phys. Rev. Lett. 13 (1964) 321"], "label": "1", "texkey": "Englert:1964et", "publication_info": { "year": 1964 }, }, "raw_refs": [{ "schema": "text", "value": "[1] F. Englert and R. Brout, “Broken symmetry and the mass of gauge vector mesons”, Phys. Rev. Lett. 13 (1964) 321, doi:10.1103/PhysRevLett.13.321.", }], }] result = map_refextract_to_schema(refextract_input) assert result == expected_output