def test_citekey_check_input_id_type(): with pytest.raises(TypeError) as excinfo: CiteKey(None) assert "input_id should be type 'str' not 'NoneType': None" == str( excinfo.value) with pytest.raises(TypeError): CiteKey(0)
def standardize_id(self): """ Extract the standard_id (standard citation key) for a csl_item and modify the csl_item in-place to set its "id" field. The standard_id is extracted from a "standard_citation" field, the "note" field, or the "id" field. The extracted citation is checked for validity and standardized, after which it is the final "standard_id". Regarding csl_item modification, the csl_item "id" field is set to the standard_citation and the note field is created or updated with key-value pairs for standard_id and original_id. Note that the Manubot software generally refers to the "id" of a CSL Item as a citekey. However, in this context, we use "id" rather than "citekey" for consistency with CSL's "id" field. """ original_id = self.get("id") self.infer_id() original_standard_id = self["id"] citekey = CiteKey(original_standard_id) standard_id = citekey.standard_id add_to_note = {} note_dict = self.note_dict if original_id and original_id != standard_id: if original_id != note_dict.get("original_id"): add_to_note["original_id"] = original_id if original_standard_id and original_standard_id != standard_id: if original_standard_id != note_dict.get("original_standard_id"): add_to_note["original_standard_id"] = original_standard_id if standard_id != note_dict.get("standard_id"): add_to_note["standard_id"] = standard_id self.note_append_dict(dictionary=add_to_note) self.set_id(standard_id) return self
def test_inspect_citekey_fails(citekey, contains): """ These citekeys should fail inspection by inspect_citekey. """ report = CiteKey(citekey).inspect() assert report is not None assert isinstance(report, str) assert contains in report
def process_record(record): """ Expand a catalog record with retrieved metadata """ output = {} html_url = record.pop('html_url') output['manubot'] = { 'repo_url': record.pop('repo_url'), 'url': html_url, 'citation': f"url:{html_url}", } if 'thumbnail_url' in record: thumbnail_url = record.pop('thumbnail_url') else: thumbnail_url = get_thumbnail_url_from_html(html_url) if thumbnail_url: output['manubot']['thumbnail_url'] = thumbnail_url for publication_type in 'preprint', 'journal': citation = record.pop(f'{publication_type}_citation', None) if not citation: continue citekey = CiteKey(citation) if not citekey.is_handled_prefix: logging.warning(f"unhandled citekey: {citation!r}") continue report = citekey.inspect() if report: logging.warning( f"citekey failed inspection: {citation!r}\n{report}") continue output[publication_type] = { 'citation': citekey.standard_id, } for item in output.values(): csl_item = citekey_to_csl_item(item['citation']) if 'url' not in item and 'URL' in csl_item: item['url'] = csl_item['URL'] item['title'] = get_title(csl_item) item['authors'] = get_authors_text(csl_item) item['journal'] = get_journal(csl_item) item['date_iso'] = get_date(csl_item) item['date_human'] = get_date_summary(csl_item) item['csl_item'] = csl_item output['extras'] = record return output
def __post_init__(self): input_ids = list(dict.fromkeys(self.input_ids)) # deduplicate self.citekeys = [CiteKey(x, self.aliases) for x in input_ids]
def test_citekey_check_input_id_at_prefix(): with pytest.raises(ValueError) as excinfo: CiteKey("@my-citekey") assert "input_id: '@my-citekey'\nstarts with '@'" in str(excinfo.value)
def test_citekey_class(input_id, citekey_attrs): citekey = CiteKey(input_id) print(citekey) for key, value in citekey_attrs.items(): assert getattr(citekey, key) == value assert citekey.short_id
def test_inspect_citekey_passes(citekey): """ These citekeys should pass inspection by inspect_citekey. """ report = CiteKey(citekey).inspect() assert report is None
def test_citekey_standard_id(input_id, expected): """ Test CiteKey.standard_id property for common prefixes. """ citekey = CiteKey(input_id) assert citekey.standard_id == expected