def test_multiple_string_write(self): bib_database = BibDatabase() bib_database.strings['name1'] = 'value1' bib_database.strings['name2'] = 'value2' # Order is important! result = bibdeskparser.dumps(bib_database) expected = '@string{name1 = {value1}}\n\n@string{name2 = {value2}}\n\n' self.assertEqual(result, expected)
def test_write_common_strings(self): bib_database = BibDatabase() bib_database.load_common_strings() writer = BibTexWriter(write_common_strings=True) result = bibdeskparser.dumps(bib_database, writer=writer) with io.open('tests/data/common_strings.bib') as f: expected = f.read() self.assertEqual(result, expected)
def test_write_dependent_strings(self): bib_database = BibDatabase() bib_database.strings['title'] = 'Mr' expr = BibDataStringExpression( [BibDataString(bib_database, 'title'), 'Smith'] ) bib_database.strings['name'] = expr result = bibdeskparser.dumps(bib_database) expected = ( '@string{title = {Mr}}\n\n@string{name = title # {Smith}}\n\n' ) self.assertEqual(result, expected)
def test_align(self): bib_database = BibDatabase() bib_database.entries = [{ 'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test', 'thisisaverylongkey': 'longvalue', }] writer = BibTexWriter() writer.align_values = True result = bibdeskparser.dumps(bib_database, writer) expected = """@book{abc123, author = {test}, thisisaverylongkey = {longvalue} } """ self.assertEqual(result, expected) with open( 'tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibdeskparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] writer.align_values = True result = bibdeskparser.dumps(bib_database, writer) expected = """@book{Toto3000, author = {Toto, A and Titi, B}, title = {A title} } @article{Wigner1938, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, volume = {34}, year = {1938} } @book{Yablon2005, author = {Yablon, A.D.}, publisher = {Springer}, title = {Optical fiber fusion slicing}, year = {2005} } """ self.assertEqual(result, expected)
def test_entry_separator(self): bib_database = BibDatabase() bib_database.entries = [{ 'ID': 'abc123', 'ENTRYTYPE': 'book', 'author': 'test' }] writer = BibTexWriter() writer.entry_separator = '' result = bibdeskparser.dumps(bib_database, writer) expected = """@book{abc123, author = {test} } """ self.assertEqual(result, expected)
def test_sort_missing_field(self): bib_database = BibDatabase() bib_database.entries = [ { 'ID': 'b', 'ENTRYTYPE': 'article', 'year': '2000' }, { 'ID': 'c', 'ENTRYTYPE': 'book', 'year': '2010' }, { 'ID': 'a', 'ENTRYTYPE': 'book' }, ] writer = BibTexWriter() writer.order_entries_by = ('year', ) result = bibdeskparser.dumps(bib_database, writer) expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n" self.assertEqual(result, expected)
def _entries_to_bibtex(self, bib_database): bibtex = '' if self.order_entries_by: # TODO: allow sort field does not exist for entry entries = sorted( bib_database.entries, key=lambda x: BibDatabase.entry_sort_key( x, self.order_entries_by), ) else: entries = bib_database.entries if self.align_values: # determine maximum field width to be used widths = [max(map(len, entry.keys())) for entry in entries] self._max_field_width = max(widths) for entry in entries: bibtex += self._entry_to_bibtex(entry) return bibtex
def test_single_string_write(self): bib_database = BibDatabase() bib_database.strings['name1'] = 'value1' result = bibdeskparser.dumps(bib_database) expected = '@string{name1 = {value1}}\n\n' self.assertEqual(result, expected)
def setUp(self): self.bd = BibDatabase()
def setUp(self): self.bd = BibDatabase() self.bd.strings['name'] = 'value' self.bds = BibDataString(self.bd, 'name')
def test_single_preamble_write(self): bib_database = BibDatabase() bib_database.preambles = [' a '] result = bibdeskparser.dumps(bib_database) expected = '@preamble{" a "}\n\n' self.assertEqual(result, expected)
class TestEntrySorting(unittest.TestCase): bib_database = BibDatabase() bib_database.entries = [ { 'ID': 'b', 'ENTRYTYPE': 'article' }, { 'ID': 'c', 'ENTRYTYPE': 'book' }, { 'ID': 'a', 'ENTRYTYPE': 'book' }, ] def test_sort_default(self): result = bibdeskparser.dumps(self.bib_database) expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n" self.assertEqual(result, expected) def test_sort_none(self): writer = BibTexWriter() writer.order_entries_by = None result = bibdeskparser.dumps(self.bib_database, writer) expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n" self.assertEqual(result, expected) def test_sort_id(self): writer = BibTexWriter() writer.order_entries_by = ('ID', ) result = bibdeskparser.dumps(self.bib_database, writer) expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n" self.assertEqual(result, expected) def test_sort_type(self): writer = BibTexWriter() writer.order_entries_by = ('ENTRYTYPE', ) result = bibdeskparser.dumps(self.bib_database, writer) expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n" self.assertEqual(result, expected) def test_sort_type_id(self): writer = BibTexWriter() writer.order_entries_by = ('ENTRYTYPE', 'ID') result = bibdeskparser.dumps(self.bib_database, writer) expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n" self.assertEqual(result, expected) def test_sort_missing_field(self): bib_database = BibDatabase() bib_database.entries = [ { 'ID': 'b', 'ENTRYTYPE': 'article', 'year': '2000' }, { 'ID': 'c', 'ENTRYTYPE': 'book', 'year': '2010' }, { 'ID': 'a', 'ENTRYTYPE': 'book' }, ] writer = BibTexWriter() writer.order_entries_by = ('year', ) result = bibdeskparser.dumps(bib_database, writer) expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n" self.assertEqual(result, expected) def test_unicode_problems(self): # See #51 bibtex = """ @article{Mesa-Gresa2013, abstract = {During a 4-week period half the mice (n = 16) were exposed to EE and the other half (n = 16) remained in a standard environment (SE). Aggr. Behav. 9999:XX-XX, 2013. © 2013 Wiley Periodicals, Inc.}, author = {Mesa-Gresa, Patricia and P\'{e}rez-Martinez, Asunci\'{o}n and Redolat, Rosa}, doi = {10.1002/ab.21481}, file = {:Users/jscholz/Documents/mendeley/Mesa-Gresa, P\'{e}rez-Martinez, Redolat - 2013 - Environmental Enrichment Improves Novel Object Recognition and Enhances Agonistic Behavior.pdf:pdf}, issn = {1098-2337}, journal = {Aggressive behavior}, month = "apr", number = {April}, pages = {269--279}, pmid = {23588702}, title = {{Environmental Enrichment Improves Novel Object Recognition and Enhances Agonistic Behavior in Male Mice.}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23588702}, volume = {39}, year = {2013} } """ bibdb = bibdeskparser.loads(bibtex) with tempfile.TemporaryFile(mode='w+') as bibtex_file: bibdeskparser.dump(bibdb, bibtex_file)
def test_entries_dict_prop(self): bib_db = BibDatabase() bib_db.entries = self.entries self.assertEqual(bib_db.entries_dict, bib_db.get_entry_dict())
def test_ignore_common_strings(self): bib_database = BibDatabase() bib_database.load_common_strings() result = bibdeskparser.dumps(bib_database) self.assertEqual(result, '')
def __init__( self, data=None, customization=None, ignore_nonstandard_types=True, homogenize_fields=False, interpolate_strings=True, common_strings=False, add_missing_from_crossref=False, ): """ Creates a parser for rading BibTeX files :return: parser :rtype: `BibTexParser` """ self.bib_database = BibDatabase() #: Load common strings such as months abbreviation #: Default: `False`. self.common_strings = common_strings if self.common_strings: self.bib_database.load_common_strings() #: Callback function to process BibTeX entries after parsing, #: for example to create a list from a string with multiple values. #: By default all BibTeX values are treated as simple strings. #: Default: `None`. self.customization = customization #: Ignore non-standard BibTeX types (`book`, `article`, etc). #: Default: `True`. self.ignore_nonstandard_types = ignore_nonstandard_types #: Sanitize BibTeX field names, for example change `url` to `link` etc. #: Field names are always converted to lowercase names. #: Default: `False`. self.homogenize_fields = homogenize_fields #: Interpolate Bibtex Strings or keep the structure self.interpolate_strings = interpolate_strings # On some sample data files, the character encoding detection simply # hangs We are going to default to utf8, and mandate it. self.encoding = 'utf8' # Add missing field from cross-ref self.add_missing_from_crossref = add_missing_from_crossref # pre-defined set of key changes self.alt_dict = { 'keyw': u'keyword', 'keywords': u'keyword', 'authors': u'author', 'editors': u'editor', 'urls': u'url', 'link': u'url', 'links': u'url', 'subjects': u'subject', 'xref': u'crossref', } # Setup the parser expression self._init_expressions()
class BibTexParser: """ A parser for reading BibTeX bibliographic data files. Example:: from bibdeskparser.bparser import BibTexParser bibtex_str = ... parser = BibTexParser() parser.ignore_nonstandard_types = False parser.homogenize_fields = False parser.common_strings = False bib_database = bibdeskparser.loads(bibtex_str, parser) :param customization: function or None (default) Customization to apply to parsed entries. :param ignore_nonstandard_types: bool (default True) If True ignores non-standard bibtex entry types. :param homogenize_fields: bool (default False) Common field name replacements (as set in alt_dict attribute). :param interpolate_strings: bool (default True) If True, replace bibtex string by their value, else uses BibDataString objects. :param common_strings: bool (default False) Include common string definitions (e.g. month abbreviations) to the bibtex file. :param add_missing_from_crossref: bool (default False) Resolve BibTeX references set in the crossref field for BibTeX entries and add the fields from the referenced entry to the referencing entry. """ def __new__(cls, data=None, **args): """ To catch the old API structure in which creating the parser would immediately parse and return data. """ if data is None: return super(BibTexParser, cls).__new__(cls) else: # For backwards compatibility: if data is given, parse # and return the `BibDatabase` object instead of the parser. return parse(data, **args) def __init__( self, data=None, customization=None, ignore_nonstandard_types=True, homogenize_fields=False, interpolate_strings=True, common_strings=False, add_missing_from_crossref=False, ): """ Creates a parser for rading BibTeX files :return: parser :rtype: `BibTexParser` """ self.bib_database = BibDatabase() #: Load common strings such as months abbreviation #: Default: `False`. self.common_strings = common_strings if self.common_strings: self.bib_database.load_common_strings() #: Callback function to process BibTeX entries after parsing, #: for example to create a list from a string with multiple values. #: By default all BibTeX values are treated as simple strings. #: Default: `None`. self.customization = customization #: Ignore non-standard BibTeX types (`book`, `article`, etc). #: Default: `True`. self.ignore_nonstandard_types = ignore_nonstandard_types #: Sanitize BibTeX field names, for example change `url` to `link` etc. #: Field names are always converted to lowercase names. #: Default: `False`. self.homogenize_fields = homogenize_fields #: Interpolate Bibtex Strings or keep the structure self.interpolate_strings = interpolate_strings # On some sample data files, the character encoding detection simply # hangs We are going to default to utf8, and mandate it. self.encoding = 'utf8' # Add missing field from cross-ref self.add_missing_from_crossref = add_missing_from_crossref # pre-defined set of key changes self.alt_dict = { 'keyw': u'keyword', 'keywords': u'keyword', 'authors': u'author', 'editors': u'editor', 'urls': u'url', 'link': u'url', 'links': u'url', 'subjects': u'subject', 'xref': u'crossref', } # Setup the parser expression self._init_expressions() def parse(self, bibtex_str, partial=False): """Parse a BibTeX string into an object :param bibtex_str: BibTeX string :type: str :param partial: If True, print errors only on parsing failures. If False, an exception is raised. :type: bool :return: bibliographic database :rtype: BibDatabase """ bibtex_file_obj = self._bibtex_file_obj(bibtex_str) try: self._expr.parseFile(bibtex_file_obj) except self._expr.ParseException as exc: logger.error("Could not parse properly, starting at %s", exc.line) if not partial: raise exc if self.add_missing_from_crossref: self.bib_database.add_missing_from_crossref() return self.bib_database def parse_file(self, file, partial=False): """Parse a BibTeX file into an object :param file: BibTeX file or file-like object :type: typing.IO :param partial: If True, print errors only on parsing failures. If False, an exception is raised. :type: bool :return: bibliographic database :rtype: BibDatabase """ return self.parse(file.read(), partial=partial) def _init_expressions(self): """ Defines all parser expressions used internally. """ self._expr = BibtexExpression() # Handle string as BibDataString object self._expr.set_string_name_parse_action( lambda s, l, t: BibDataString(self.bib_database, t[0])) if self.interpolate_strings: maybe_interpolate = lambda expr: as_text(expr) else: maybe_interpolate = lambda expr: expr self._expr.set_string_expression_parse_action( lambda s, l, t: maybe_interpolate( BibDataStringExpression.expression_if_needed(t))) # Add notice to logger self._expr.add_log_function(logger.debug) # Set actions self._expr.entry.addParseAction(lambda s, l, t: self._add_entry( t.get('EntryType'), t.get('Key'), t.get('Fields'))) self._expr.implicit_comment.addParseAction( lambda s, l, t: self._add_comment(t[0])) self._expr.explicit_comment.addParseAction( lambda s, l, t: self._add_comment(t[0])) self._expr.preamble_decl.addParseAction( lambda s, l, t: self._add_preamble(t[0])) self._expr.string_def.addParseAction(lambda s, l, t: self._add_string( t['StringName'].name, t['StringValue'])) def _bibtex_file_obj(self, bibtex_str): # Some files have Byte-order marks inserted at the start byte = b'\xef\xbb\xbf' if isinstance(bibtex_str, str): byte = str(byte, self.encoding, 'ignore') if bibtex_str[0] == byte: bibtex_str = bibtex_str[1:] else: if bibtex_str[:3] == byte: bibtex_str = bibtex_str[3:] bibtex_str = bibtex_str.decode(encoding=self.encoding) return io.StringIO(bibtex_str) def _clean_val(self, val): """ Clean instring before adding to dictionary :param val: a value :type val: string :returns: string -- value """ if not val or val == "{}": return '' return val def _clean_key(self, key): """ Lowercase a key and return as str. :param key: a key :type key: str :returns: (str) string-value """ key = key.lower() if not isinstance(key, str): return str(key, 'utf-8') else: return key def _clean_field_key(self, key): """ Clean a bibtex field key and homogenize alternative forms. :param key: a key :type key: str :returns: string-value """ key = self._clean_key(key) if self.homogenize_fields: if key in list(self.alt_dict.keys()): key = self.alt_dict[key] return key def _add_entry(self, entry_type, entry_id, fields): """ Adds a parsed entry. Includes checking type and fields, cleaning, applying customizations. :param entry_type: the entry type :type entry_type: str :param entry_id: the entry bibid :type entry_id: str :param fields: the fields and values :type fields: dictionary :returns: string-value """ d = {} entry_type = self._clean_key(entry_type) if self.ignore_nonstandard_types and entry_type not in STANDARD_TYPES: logger.warning('Entry type %s not standard. Not considered.', entry_type) return for key in fields: d[self._clean_field_key(key)] = self._clean_val(fields[key]) d['ENTRYTYPE'] = entry_type d['ID'] = entry_id crossref = d.get('crossref', None) if self.add_missing_from_crossref and crossref is not None: d['_crossref'] = crossref if self.customization is not None: logger.debug('Apply customizations and return dict') d = self.customization(d) self.bib_database.entries.append(d) def _add_comment(self, comment): """ Stores a comment in the list of comment. :param comment: the parsed comment :type comment: str """ logger.debug('Store comment in list of comments: ' + comment.__repr__()) self.bib_database.comments.append(comment) def _add_string(self, string_key, string): """ Stores a new string in the string dictionary. :param string_key: the string key :type string_key: str :param string: the string value :type string: str """ if string_key in self.bib_database.strings: logger.warning('Overwritting existing string for key: %s.', string_key) logger.debug(u'Store string: {} -> {}'.format(string_key, string)) self.bib_database.strings[string_key] = self._clean_val(string) def _add_preamble(self, preamble): """ Stores a preamble. :param preamble: the parsed preamble :type preamble: str """ logger.debug('Store preamble in list of preambles') self.bib_database.preambles.append(preamble)
def test_multiple_string_write(self): bib_database = BibDatabase() bib_database.preambles = [' a ', 'b'] result = bibdeskparser.dumps(bib_database) expected = '@preamble{" a "}\n\n@preamble{"b"}\n\n' self.assertEqual(result, expected)
def test_ignore_common_strings_only_if_not_overloaded(self): bib_database = BibDatabase() bib_database.load_common_strings() bib_database.strings['jan'] = 'Janvier' result = bibdeskparser.dumps(bib_database) self.assertEqual(result, '@string{jan = {Janvier}}\n\n')
def test_entries_list_method(self): bib_db = BibDatabase() bib_db.entries = self.entries self.assertEqual(bib_db.entries, bib_db.get_entry_list())