示例#1
0
def citekey_to_csl_item(citekey, prune=True):
    """
    Generate a CSL Item (Python dictionary) for the input citekey.
    """
    from manubot.cite.csl_item import CSL_Item
    from manubot import __version__ as manubot_version

    citekey == standardize_citekey(citekey, warn_if_changed=True)
    source, identifier = citekey.split(':', 1)

    if source not in citeproc_retrievers:
        msg = f'Unsupported citation source {source!r} in {citekey!r}'
        raise ValueError(msg)
    citeproc_retriever = import_function(citeproc_retrievers[source])
    csl_item = citeproc_retriever(identifier)
    csl_item = CSL_Item(csl_item)

    note_text = f'This CSL JSON Item was automatically generated by Manubot v{manubot_version} using citation-by-identifier.'
    note_dict = {
        'standard_id': citekey,
    }
    csl_item.note_append_text(note_text)
    csl_item.note_append_dict(note_dict)

    short_citekey = shorten_citekey(citekey)
    csl_item.set_id(short_citekey)
    csl_item.clean(prune=prune)

    return csl_item
示例#2
0
def test_csl_item_standardize_id_note():
    """
    Test extracting standard_id from a note and setting additional
    note fields.
    """
    csl_item = CSL_Item({
        'id': 'original-id',
        'type': 'article-journal',
        'note': 'standard_id: doi:10.1371/journal.PPAT.1006256',
    })
    csl_item.standardize_id()
    assert csl_item['id'] == 'doi:10.1371/journal.ppat.1006256'
    note_dict = csl_item.note_dict
    assert note_dict['original_id'] == 'original-id'
    assert note_dict['original_standard_id'] == 'doi:10.1371/journal.PPAT.1006256'
示例#3
0
def test_cite_command_preserves_order():
    """
    https://github.com/manubot/manubot/issues/240
    """
    citekeys = [
        "pmid:29618526",
        "doi:10.7717/peerj.338",
        "arxiv:1806.05726v1",
        "pubmed:29618526",
        "DOI:10.7717/PEERJ.338",
    ]
    args = [
        "manubot",
        "cite",
        "--bibliography=input-references.json",
        *citekeys,
    ]
    output = subprocess.check_output(args, encoding="utf-8", cwd=data_dir,)
    csl_items = json.loads(output)
    csl_items = [CSL_Item(x) for x in csl_items]
    standard_ids = [csl_item.note_dict.get("standard_id") for csl_item in csl_items]
    assert standard_ids == [
        "pubmed:29618526",
        "doi:10.7717/peerj.338",
        "arxiv:1806.05726v1",
    ]
示例#4
0
def load_bibliography(path: str) -> list:
    """
    Load a bibliography as CSL Items (a CSL JSON Python object).
    For paths that already contain CSL Items (inferred from a .json or .yaml extension),
    parse these files directly (URLs supported).
    Otherwise, delegate conversion to CSL Items to pandoc-citeproc (URLs not supported).
    If loading fails, log an error and return an empty list.
    """
    path_obj = pathlib.Path(path)
    if path_obj.suffix in {".json", ".yaml"}:
        try:
            csl_items = read_serialized_data(path)
        except Exception as error:
            logging.error(f"load_bibliography: error reading {path!r}.\n{error}")
            logging.info("load_bibliography exception info", exc_info=True)
            csl_items = []
    else:
        from manubot.pandoc.bibliography import (
            load_bibliography as load_bibliography_pandoc,
        )

        csl_items = load_bibliography_pandoc(path)
    if not isinstance(csl_items, list):
        logging.error(
            f"process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. "
            "Setting csl_items to an empty list."
        )
        csl_items = []
    from manubot.cite.csl_item import CSL_Item

    csl_items = [CSL_Item(csl_item) for csl_item in csl_items]
    return csl_items
示例#5
0
def load_bibliography(path) -> list:
    """
    Load a bibliography as CSL Items (a CSL JSON Python object).
    For paths that already contain CSL Items (inferred from a .json or .yaml extension),
    parse these files directly. Otherwise, delegate conversion to CSL Items to pandoc-citeproc.
    """
    path = pathlib.Path(path)
    if path.suffix in {".json", ".yaml"}:
        try:
            csl_items = read_serialized_data(path)
        except Exception:
            logging.exception(
                f"process.load_bibliography: error parsing {path}.\n")
            csl_items = []
    else:
        from manubot.pandoc.bibliography import (
            load_bibliography as load_bibliography_pandoc, )

        csl_items = load_bibliography_pandoc(path)
    if not isinstance(csl_items, list):
        logging.error(
            f"process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. "
            "Setting csl_items to an empty list.")
        csl_items = []
    from manubot.cite.csl_item import CSL_Item

    csl_items = [CSL_Item(csl_item) for csl_item in csl_items]
    return csl_items
示例#6
0
def load_bibliography(path) -> list:
    """
    Load a bibliography as CSL Items (a CSL JSON Python object).
    For paths that already contain CSL Items (inferred from a .json or .yaml extension),
    parse these files directly. Otherwise, delegate conversion to CSL Items to pandoc-citeproc.
    """
    path = pathlib.Path(path)
    use_pandoc_citeproc = True
    try:
        if path.suffix == '.json':
            use_pandoc_citeproc = False
            with path.open(encoding='utf-8-sig') as read_file:
                csl_items = json.load(read_file)
        if path.suffix == '.yaml':
            use_pandoc_citeproc = False
            import yaml
            with path.open(encoding='utf-8-sig') as read_file:
                csl_items = yaml.safe_load(read_file)
    except Exception:
        logging.exception(
            f'process.load_bibliography: error parsing {path}.\n')
        csl_items = []
    if use_pandoc_citeproc:
        from manubot.pandoc.bibliography import (
            load_bibliography as load_bibliography_pandoc, )
        csl_items = load_bibliography_pandoc(path)
    if not isinstance(csl_items, list):
        logging.error(
            f'process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. '
            'Setting csl_items to an empty list.')
        csl_items = []
    from manubot.cite.csl_item import CSL_Item
    csl_items = [CSL_Item(csl_item) for csl_item in csl_items]
    return csl_items
示例#7
0
def test_csl_item_standardize_id_note():
    """
    Test extracting standard_id from a note and setting additional
    note fields.
    """
    csl_item = CSL_Item({
        "id":
        "original-id",
        "type":
        "article-journal",
        "note":
        "standard_id: doi:10.1371/journal.PPAT.1006256",
    })
    csl_item.standardize_id()
    assert csl_item["id"] == "doi:10.1371/journal.ppat.1006256"
    note_dict = csl_item.note_dict
    assert note_dict["original_id"] == "original-id"
    assert note_dict[
        "original_standard_id"] == "doi:10.1371/journal.PPAT.1006256"
示例#8
0
def test_csl_item_note_append(input_note, text, dictionary, expected_note):
    csl_item = CSL_Item({
        'id': 'test_csl_item',
        'type': 'entry',
        'note': input_note,
    })
    csl_item.note_append_text(text)
    csl_item.note_append_dict(dictionary)
    assert csl_item.note == expected_note
示例#9
0
def populate_metadata_from_manubot_url(url, metadata):
    """
    Returns metadata dictionary populated with manubot csl item data or None
    if manubot fails to retrieve data,
    and whether this fills the metadata or not.
    """
    try:
        csl_item = get_csl_item(url)
        if not isinstance(csl_item, CSL_Item):
            csl_item = CSL_Item(csl_item)

        # TODO: We can use this if we want to reject uploads that don't match
        # certain content types
        #
        # if csl_item['type'] not in MANUBOT_PAPER_TYPES:
        #     return None

        doi = None
        if "DOI" in csl_item:
            doi = csl_item["DOI"].lower()

        paper_publish_date = csl_item.get_date("issued", fill=True)

        data = {}
        data["abstract"] = csl_item.get("abstract", None)
        data["doi"] = doi
        data["is_public"] = True
        data["paper_title"] = csl_item.get("title", None)
        data["csl_item"] = csl_item
        data["paper_publish_date"] = paper_publish_date
        data["raw_authors"] = get_raw_authors_from_csl_item(csl_item)

        metadata.update(data)
        return metadata, True
    except Exception as e:
        print(e)
        return None, False
示例#10
0
def test_csl_item_note_append(input_note, text, dictionary, expected_note):
    csl_item = CSL_Item({
        "id": "test_csl_item",
        "type": "entry",
        "note": input_note
    })
    csl_item.note_append_text(text)
    csl_item.note_append_dict(dictionary)
    assert csl_item.note == expected_note
示例#11
0
 def test_correct_invalid_type(self):
     assert CSL_Item(type="journal-article").correct_invalid_type() == {
         "type": "article-journal"
     }
示例#12
0
 def test_constructor_leaves_no_inplace_effects(self):
     dict1 = {"a": 1}
     ci = CSL_Item(dict1, b=2)
     assert ci == {"a": 1, "b": 2}
     assert dict1 == {"a": 1}
示例#13
0
 def test_recursive_constructor(self):
     assert CSL_Item(CSL_Item()) == {}
     assert CSL_Item(CSL_Item(abc=1)) == {"abc": 1}
示例#14
0
def test_csl_item_note_dict(note, dictionary):
    csl_item = CSL_Item(note=note)
    assert csl_item.note_dict == dictionary
示例#15
0
 def test_constuctor_by_dict_keyword_combination(self):
     assert CSL_Item({"title": "My journal article"},
                     type="journal-article") == {
                         "title": "My journal article",
                         "type": "journal-article",
                     }
示例#16
0
 def test_constuctor_by_keyword(self):
     assert CSL_Item(type='journal-article') == {'type': 'journal-article'}
示例#17
0
 def test_clean_set_id(self):
     csl_item = CSL_Item(type='chapter')
     csl_item.set_id('abc')
     csl_item.clean(prune=True)
     assert csl_item == {'type': 'chapter', 'id': 'abc'}
示例#18
0
 def test_constuctor_empty(self):
     assert CSL_Item() == {}
示例#19
0
 def test_constuctor_by_dict_keyword_combination(self):
     assert CSL_Item({'title': 'My journal article'},
                     type='journal-article') == \
         {'title': 'My journal article', 'type': 'journal-article'}
示例#20
0
 def test_clean_set_id(self):
     csl_item = CSL_Item(type="chapter")
     csl_item.set_id("abc")
     csl_item.clean(prune=True)
     assert csl_item == {"type": "chapter", "id": "abc"}
示例#21
0
 def test_correct_invalid_type(self):
     assert CSL_Item(type='journal-article').correct_invalid_type() == \
         {'type': 'article-journal'}
示例#22
0
 def test_set_default_type(self):
     assert CSL_Item().set_default_type() == {'type': 'entry'}
示例#23
0
 def test_set_default_type(self):
     assert CSL_Item().set_default_type() == {"type": "entry"}
示例#24
0
 def test_constuctor_by_keyword(self):
     assert CSL_Item(type="journal-article") == {"type": "journal-article"}
示例#25
0
 def test_no_change_of_type(self):
     assert CSL_Item(type="book").correct_invalid_type() == {"type": "book"}
     assert CSL_Item(type="book").set_default_type() == {"type": "book"}
示例#26
0
 def test_no_change_of_type(self):
     assert CSL_Item(type='book').correct_invalid_type() == {'type': 'book'}
     assert CSL_Item(type='book').set_default_type() == {'type': 'book'}
示例#27
0
def test_assert_csl_item_type_passes():
    assert_csl_item_type(CSL_Item())
示例#28
0
def test_csl_item_standardize_id_repeated():
    csl_item = CSL_Item(id="pmid:1", type="article-journal")
    csl_item_1 = copy.deepcopy(csl_item.standardize_id())
    assert "standard_citation" not in "csl_item"
    csl_item_2 = copy.deepcopy(csl_item.standardize_id())
    assert csl_item_1 == csl_item_2
示例#29
0
def test_csl_item_standardize_id(csl_item, standard_citation):
    csl_item = CSL_Item(csl_item)
    output = csl_item.standardize_id()
    assert output is csl_item
    assert output["id"] == standard_citation
示例#30
0
 def test_constuctor_by_dict(self):
     d = {"title": "My book"}
     assert CSL_Item(d) == d