Exemplo n.º 1
0
    def standardize_id(self):
        """
        Extract the standard_id (standard citation key) for a csl_item and modify the csl_item in-place to set its "id" field.
        The standard_id is extracted from a "standard_citation" field, the "note" field, or the "id" field.
        If extracting the citation from the "id" field, uses the infer_citekey_prefix function to set the prefix.
        For example, if the extracted standard_id does not begin with a supported prefix (e.g. "doi:", "pmid:" or "raw:"),
        the citation is assumed to be raw and given a "raw:" prefix.
        The extracted citation is checked for validity and standardized, after which it is the final "standard_id".

        Regarding csl_item modification, the csl_item "id" field is set to the standard_citation and the note field
        is created or updated with key-value pairs for standard_id and original_id.

        Note that the Manubot software generally refers to the "id" of a CSL Item as a citekey.
        However, in this context, we use "id" rather than "citekey" for consistency with CSL's "id" field.
        """
        original_id = self.get("id")
        self.infer_id()
        original_standard_id = self["id"]
        assert is_valid_citekey(original_standard_id, allow_raw=True)
        standard_id = standardize_citekey(original_standard_id,
                                          warn_if_changed=False)
        add_to_note = {}
        note_dict = self.note_dict
        if original_id and original_id != standard_id:
            if original_id != note_dict.get("original_id"):
                add_to_note["original_id"] = original_id
        if original_standard_id and original_standard_id != standard_id:
            if original_standard_id != note_dict.get("original_standard_id"):
                add_to_note["original_standard_id"] = original_standard_id
        if standard_id != note_dict.get("standard_id"):
            add_to_note["standard_id"] = standard_id
        self.note_append_dict(dictionary=add_to_note)
        self.set_id(standard_id)
        return self
Exemplo n.º 2
0
def standardize_citation(*args, **kwargs):
    import warnings
    warnings.warn(
        "'standardize_citation' has been renamed to 'standardize_citekey'"
        " and will be removed in a future release.",
        category=FutureWarning)
    return standardize_citekey(*args, **kwargs)
Exemplo n.º 3
0
def csl_item_set_standard_id(csl_item):
    """
    Extract the standard_id (standard citation key) for a csl_item and modify the csl_item in-place to set its "id" field.
    The standard_id is extracted from a "standard_citation" field, the "note" field, or the "id" field.
    If extracting the citation from the "id" field, uses the infer_citekey_prefix function to set the prefix.
    For example, if the extracted standard_id does not begin with a supported prefix (e.g. "doi:", "pmid:"
    or "raw:"), the citation is assumed to be raw and given a "raw:" prefix. The extracted citation
    (referred to as "original_standard_id") is checked for validity and standardized, after which it is
    the final "standard_id".

    Regarding csl_item modification, the csl_item "id" field is set to the standard_citation and the note field
    is created or updated with key-value pairs for standard_id, original_standard_id, and original_id.

    Note that the Manubot software generally refers to the "id" of a CSL Item as a citekey.
    However, in this context, we use "id" rather than "citekey" for consistency with CSL's "id" field.
    """
    if not isinstance(csl_item, dict):
        raise ValueError(
            "csl_item must be a CSL Data Item represented as a Python dictionary")

    from manubot.cite.citeproc import (
        append_to_csl_item_note,
        parse_csl_item_note,
    )
    note_dict = parse_csl_item_note(csl_item.get('note', ''))

    original_id = None
    original_standard_id = None
    if 'id' in csl_item:
        original_id = csl_item['id']
        original_standard_id = infer_citekey_prefix(original_id)
    if 'standard_id' in note_dict:
        original_standard_id = note_dict['standard_id']
    if 'standard_citation' in csl_item:
        original_standard_id = csl_item.pop('standard_citation')
    if original_standard_id is None:
        raise ValueError(
            'csl_item_set_standard_id could not detect a field with a citation / standard_citation. '
            'Consider setting the CSL Item "id" field.')
    assert is_valid_citekey(original_standard_id, allow_raw=True)
    standard_id = standardize_citekey(
        original_standard_id, warn_if_changed=False)
    add_to_note = {}
    if original_id and original_id != standard_id:
        if original_id != note_dict.get('original_id'):
            add_to_note['original_id'] = original_id
    if original_standard_id and original_standard_id != standard_id:
        if original_standard_id != note_dict.get('original_standard_id'):
            add_to_note['original_standard_id'] = original_standard_id
    if standard_id != note_dict.get('standard_id'):
        add_to_note['standard_id'] = standard_id
    append_to_csl_item_note(csl_item, dictionary=add_to_note)
    csl_item['id'] = standard_id
    return csl_item
Exemplo n.º 4
0
def cli_cite(args):
    """
    Main function for the manubot cite command-line interface.

    Does not allow user to directly specify Pandoc's --to argument, due to
    inconsistent citaiton rendering by output format. See
    https://github.com/jgm/pandoc/issues/4834
    """
    # generate CSL JSON data
    csl_list = list()
    for citekey in args.citekeys:
        try:
            if not is_valid_citekey(citekey):
                continue
            citekey = standardize_citekey(citekey)
            csl_item = citekey_to_csl_item(citekey, prune=args.prune_csl)
            csl_list.append(csl_item)
        except Exception as error:
            logging.error(f'citekey_to_csl_item for {citekey!r} failed '
                          f'due to a {error.__class__.__name__}:\n{error}')
            logging.info(error, exc_info=True)

    # output CSL JSON data, if --render is False
    if not args.render:
        write_file = args.output.open(
            'w', encoding='utf-8') if args.output else sys.stdout
        with write_file:
            json.dump(csl_list, write_file, ensure_ascii=False, indent=2)
            write_file.write('\n')
        return

    # use Pandoc to render references
    if not args.format and args.output:
        vars(args)['format'] = extension_to_format.get(args.output.suffix)
    if not args.format:
        vars(args)['format'] = 'plain'
    pandoc_metadata = {
        'nocite': '@*',
        'csl': args.csl,
        'references': csl_list,
    }
    call_pandoc(
        metadata=pandoc_metadata,
        path=args.output,
        format=args.format,
    )
Exemplo n.º 5
0
def process_record(record):
    """
    Expand a catalog record with retrieved metadata
    """
    output = {}
    html_url = record.pop('html_url')
    output['manubot'] = {
        'repo_url': record.pop('repo_url'),
        'url': html_url,
        'citation': f"url:{html_url}",
    }
    if 'thumbnail_url' in record:
        thumbnail_url = record.pop('thumbnail_url')
    else:
        thumbnail_url = get_thumbnail_url_from_html(html_url)
    if thumbnail_url:
        output['manubot']['thumbnail_url'] = thumbnail_url
    for publication_type in 'preprint', 'journal':
        citation = record.pop(f'{publication_type}_citation', None)
        if not citation:
            continue
        if not is_valid_citekey(citation):
            continue
        output[publication_type] = {
            'citation': citation,
        }
    for item in output.values():
        citation = standardize_citekey(item['citation'])
        csl_item = citekey_to_csl_item(citation)
        if 'url' not in item and 'URL' in csl_item:
            item['url'] = csl_item['URL']
        item['title'] = get_title(csl_item)
        item['authors'] = get_authors_text(csl_item)
        item['journal'] = get_journal(csl_item)
        item['date_iso'] = get_date(csl_item)
        item['date_human'] = get_date_summary(csl_item)
        item['csl_item'] = csl_item
    output['extras'] = record
    return output