def citekey_to_csl_item(citekey, prune=True): """ Generate a CSL Item (Python dictionary) for the input citekey. """ from manubot.cite.csl_item import CSL_Item from manubot import __version__ as manubot_version citekey == standardize_citekey(citekey, warn_if_changed=True) source, identifier = citekey.split(':', 1) if source not in citeproc_retrievers: msg = f'Unsupported citation source {source!r} in {citekey!r}' raise ValueError(msg) citeproc_retriever = import_function(citeproc_retrievers[source]) csl_item = citeproc_retriever(identifier) csl_item = CSL_Item(csl_item) note_text = f'This CSL JSON Item was automatically generated by Manubot v{manubot_version} using citation-by-identifier.' note_dict = { 'standard_id': citekey, } csl_item.note_append_text(note_text) csl_item.note_append_dict(note_dict) short_citekey = shorten_citekey(citekey) csl_item.set_id(short_citekey) csl_item.clean(prune=prune) return csl_item
def test_csl_item_standardize_id_note(): """ Test extracting standard_id from a note and setting additional note fields. """ csl_item = CSL_Item({ 'id': 'original-id', 'type': 'article-journal', 'note': 'standard_id: doi:10.1371/journal.PPAT.1006256', }) csl_item.standardize_id() assert csl_item['id'] == 'doi:10.1371/journal.ppat.1006256' note_dict = csl_item.note_dict assert note_dict['original_id'] == 'original-id' assert note_dict['original_standard_id'] == 'doi:10.1371/journal.PPAT.1006256'
def test_cite_command_preserves_order(): """ https://github.com/manubot/manubot/issues/240 """ citekeys = [ "pmid:29618526", "doi:10.7717/peerj.338", "arxiv:1806.05726v1", "pubmed:29618526", "DOI:10.7717/PEERJ.338", ] args = [ "manubot", "cite", "--bibliography=input-references.json", *citekeys, ] output = subprocess.check_output(args, encoding="utf-8", cwd=data_dir,) csl_items = json.loads(output) csl_items = [CSL_Item(x) for x in csl_items] standard_ids = [csl_item.note_dict.get("standard_id") for csl_item in csl_items] assert standard_ids == [ "pubmed:29618526", "doi:10.7717/peerj.338", "arxiv:1806.05726v1", ]
def load_bibliography(path: str) -> list: """ Load a bibliography as CSL Items (a CSL JSON Python object). For paths that already contain CSL Items (inferred from a .json or .yaml extension), parse these files directly (URLs supported). Otherwise, delegate conversion to CSL Items to pandoc-citeproc (URLs not supported). If loading fails, log an error and return an empty list. """ path_obj = pathlib.Path(path) if path_obj.suffix in {".json", ".yaml"}: try: csl_items = read_serialized_data(path) except Exception as error: logging.error(f"load_bibliography: error reading {path!r}.\n{error}") logging.info("load_bibliography exception info", exc_info=True) csl_items = [] else: from manubot.pandoc.bibliography import ( load_bibliography as load_bibliography_pandoc, ) csl_items = load_bibliography_pandoc(path) if not isinstance(csl_items, list): logging.error( f"process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. " "Setting csl_items to an empty list." ) csl_items = [] from manubot.cite.csl_item import CSL_Item csl_items = [CSL_Item(csl_item) for csl_item in csl_items] return csl_items
def load_bibliography(path) -> list: """ Load a bibliography as CSL Items (a CSL JSON Python object). For paths that already contain CSL Items (inferred from a .json or .yaml extension), parse these files directly. Otherwise, delegate conversion to CSL Items to pandoc-citeproc. """ path = pathlib.Path(path) if path.suffix in {".json", ".yaml"}: try: csl_items = read_serialized_data(path) except Exception: logging.exception( f"process.load_bibliography: error parsing {path}.\n") csl_items = [] else: from manubot.pandoc.bibliography import ( load_bibliography as load_bibliography_pandoc, ) csl_items = load_bibliography_pandoc(path) if not isinstance(csl_items, list): logging.error( f"process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. " "Setting csl_items to an empty list.") csl_items = [] from manubot.cite.csl_item import CSL_Item csl_items = [CSL_Item(csl_item) for csl_item in csl_items] return csl_items
def load_bibliography(path) -> list: """ Load a bibliography as CSL Items (a CSL JSON Python object). For paths that already contain CSL Items (inferred from a .json or .yaml extension), parse these files directly. Otherwise, delegate conversion to CSL Items to pandoc-citeproc. """ path = pathlib.Path(path) use_pandoc_citeproc = True try: if path.suffix == '.json': use_pandoc_citeproc = False with path.open(encoding='utf-8-sig') as read_file: csl_items = json.load(read_file) if path.suffix == '.yaml': use_pandoc_citeproc = False import yaml with path.open(encoding='utf-8-sig') as read_file: csl_items = yaml.safe_load(read_file) except Exception: logging.exception( f'process.load_bibliography: error parsing {path}.\n') csl_items = [] if use_pandoc_citeproc: from manubot.pandoc.bibliography import ( load_bibliography as load_bibliography_pandoc, ) csl_items = load_bibliography_pandoc(path) if not isinstance(csl_items, list): logging.error( f'process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. ' 'Setting csl_items to an empty list.') csl_items = [] from manubot.cite.csl_item import CSL_Item csl_items = [CSL_Item(csl_item) for csl_item in csl_items] return csl_items
def test_csl_item_standardize_id_note(): """ Test extracting standard_id from a note and setting additional note fields. """ csl_item = CSL_Item({ "id": "original-id", "type": "article-journal", "note": "standard_id: doi:10.1371/journal.PPAT.1006256", }) csl_item.standardize_id() assert csl_item["id"] == "doi:10.1371/journal.ppat.1006256" note_dict = csl_item.note_dict assert note_dict["original_id"] == "original-id" assert note_dict[ "original_standard_id"] == "doi:10.1371/journal.PPAT.1006256"
def test_csl_item_note_append(input_note, text, dictionary, expected_note): csl_item = CSL_Item({ 'id': 'test_csl_item', 'type': 'entry', 'note': input_note, }) csl_item.note_append_text(text) csl_item.note_append_dict(dictionary) assert csl_item.note == expected_note
def populate_metadata_from_manubot_url(url, metadata): """ Returns metadata dictionary populated with manubot csl item data or None if manubot fails to retrieve data, and whether this fills the metadata or not. """ try: csl_item = get_csl_item(url) if not isinstance(csl_item, CSL_Item): csl_item = CSL_Item(csl_item) # TODO: We can use this if we want to reject uploads that don't match # certain content types # # if csl_item['type'] not in MANUBOT_PAPER_TYPES: # return None doi = None if "DOI" in csl_item: doi = csl_item["DOI"].lower() paper_publish_date = csl_item.get_date("issued", fill=True) data = {} data["abstract"] = csl_item.get("abstract", None) data["doi"] = doi data["is_public"] = True data["paper_title"] = csl_item.get("title", None) data["csl_item"] = csl_item data["paper_publish_date"] = paper_publish_date data["raw_authors"] = get_raw_authors_from_csl_item(csl_item) metadata.update(data) return metadata, True except Exception as e: print(e) return None, False
def test_csl_item_note_append(input_note, text, dictionary, expected_note): csl_item = CSL_Item({ "id": "test_csl_item", "type": "entry", "note": input_note }) csl_item.note_append_text(text) csl_item.note_append_dict(dictionary) assert csl_item.note == expected_note
def test_correct_invalid_type(self): assert CSL_Item(type="journal-article").correct_invalid_type() == { "type": "article-journal" }
def test_constructor_leaves_no_inplace_effects(self): dict1 = {"a": 1} ci = CSL_Item(dict1, b=2) assert ci == {"a": 1, "b": 2} assert dict1 == {"a": 1}
def test_recursive_constructor(self): assert CSL_Item(CSL_Item()) == {} assert CSL_Item(CSL_Item(abc=1)) == {"abc": 1}
def test_csl_item_note_dict(note, dictionary): csl_item = CSL_Item(note=note) assert csl_item.note_dict == dictionary
def test_constuctor_by_dict_keyword_combination(self): assert CSL_Item({"title": "My journal article"}, type="journal-article") == { "title": "My journal article", "type": "journal-article", }
def test_constuctor_by_keyword(self): assert CSL_Item(type='journal-article') == {'type': 'journal-article'}
def test_clean_set_id(self): csl_item = CSL_Item(type='chapter') csl_item.set_id('abc') csl_item.clean(prune=True) assert csl_item == {'type': 'chapter', 'id': 'abc'}
def test_constuctor_empty(self): assert CSL_Item() == {}
def test_constuctor_by_dict_keyword_combination(self): assert CSL_Item({'title': 'My journal article'}, type='journal-article') == \ {'title': 'My journal article', 'type': 'journal-article'}
def test_clean_set_id(self): csl_item = CSL_Item(type="chapter") csl_item.set_id("abc") csl_item.clean(prune=True) assert csl_item == {"type": "chapter", "id": "abc"}
def test_correct_invalid_type(self): assert CSL_Item(type='journal-article').correct_invalid_type() == \ {'type': 'article-journal'}
def test_set_default_type(self): assert CSL_Item().set_default_type() == {'type': 'entry'}
def test_set_default_type(self): assert CSL_Item().set_default_type() == {"type": "entry"}
def test_constuctor_by_keyword(self): assert CSL_Item(type="journal-article") == {"type": "journal-article"}
def test_no_change_of_type(self): assert CSL_Item(type="book").correct_invalid_type() == {"type": "book"} assert CSL_Item(type="book").set_default_type() == {"type": "book"}
def test_no_change_of_type(self): assert CSL_Item(type='book').correct_invalid_type() == {'type': 'book'} assert CSL_Item(type='book').set_default_type() == {'type': 'book'}
def test_assert_csl_item_type_passes(): assert_csl_item_type(CSL_Item())
def test_csl_item_standardize_id_repeated(): csl_item = CSL_Item(id="pmid:1", type="article-journal") csl_item_1 = copy.deepcopy(csl_item.standardize_id()) assert "standard_citation" not in "csl_item" csl_item_2 = copy.deepcopy(csl_item.standardize_id()) assert csl_item_1 == csl_item_2
def test_csl_item_standardize_id(csl_item, standard_citation): csl_item = CSL_Item(csl_item) output = csl_item.standardize_id() assert output is csl_item assert output["id"] == standard_citation
def test_constuctor_by_dict(self): d = {"title": "My book"} assert CSL_Item(d) == d