def _get_crossreferenced_citations(self, citations, min_crossrefs): r""" Get cititations not cited explicitly but referenced by other citations. >>> from pybtex.database import Entry >>> data = BibliographyData({ ... 'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}), ... 'xrefd_arcicle': Entry('article'), ... }) >>> list(data._get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) [u'xrefd_arcicle'] >>> list(data._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) [u'xrefd_arcicle'] >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> data2 = BibliographyData(data.entries, wanted_entries=data.entries.keys()) >>> list(data2._get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) [u'xrefd_arcicle'] >>> list(data2._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) [u'xrefd_arcicle'] >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] """ crossref_count = CaseInsensitiveDefaultDict(int) citation_set = CaseInsensitiveSet(citations) for citation in citations: try: entry = self.entries[citation] crossref = entry.fields['crossref'] except KeyError: continue try: crossref_entry = self.entries[crossref] except KeyError: report_error(BibliographyDataError( 'bad cross-reference: entry "{key}" refers to ' 'entry "{crossref}" which does not exist.'.format( key=citation, crossref=crossref, ) )) continue canonical_crossref = crossref_entry.key crossref_count[canonical_crossref] += 1 if crossref_count[canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set: citation_set.add(canonical_crossref) yield canonical_crossref
def handle_bibdata(self, bibdata): if self.data is not None: report_error( AuxDataError(r'illegal, another \bibdata command', self.context)) else: self.data = bibdata.split(',')
def _get_crossreferenced_citations(self, citations, min_crossrefs): """ Get cititations not cited explicitly but referenced by other citations. >>> from pybtex.database import Entry >>> data = BibliographyData({ ... 'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}), ... 'xrefd_arcicle': Entry('article'), ... }) >>> list(data._get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> data2 = BibliographyData(data.entries, wanted_entries=data.entries.keys()) >>> list(data2._get_crossreferenced_citations([], min_crossrefs=1)) [] >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data2._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) ['xrefd_arcicle'] >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) [] >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) [] """ crossref_count = CaseInsensitiveDefaultDict(int) citation_set = CaseInsensitiveSet(citations) for citation in citations: try: entry = self.entries[citation] crossref = entry.fields['crossref'] except KeyError: continue try: crossref_entry = self.entries[crossref] except KeyError: report_error(BibliographyDataError( 'bad cross-reference: entry "{key}" refers to ' 'entry "{crossref}" which does not exist.'.format( key=citation, crossref=crossref, ) )) continue canonical_crossref = crossref_entry.key crossref_count[canonical_crossref] += 1 if crossref_count[canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set: citation_set.add(canonical_crossref) yield canonical_crossref
def handle_bibstyle(self, style): if self.style is not None: report_error( AuxDataError(r'illegal, another \bibstyle command', self.context)) else: self.style = style
def add_entry(self, key, entry): if not isinstance(key, EntryKey): key = EntryKey.from_string(key) if key in self.entries: report_error(BibliographyDataError('repeated bibliograhpy entry: %s' % key)) return entry.collection = self entry.key = key self.entries[key] = entry
def handle_citation(self, keys): for key in keys.split(','): key_lower = key.lower() if key_lower in self._canonical_keys: existing_key = self._canonical_keys[key_lower] if key != existing_key: msg = 'case mismatch error between cite keys {0} and {1}' report_error(AuxDataError(msg.format(key, existing_key), self.context)) self.citations.append(key) self._canonical_keys[key_lower] = key
def add_entry(self, key, entry): if not self.want_entry(key): return if key in self.entries: report_error(BibliographyDataError('repeated bibliograhpy entry: %s' % key)) return entry.key = self.get_canonical_key(key) self.entries[entry.key] = entry try: crossref = entry.fields['crossref'] except KeyError: pass else: if self.wanted_entries is not None: self.wanted_entries.add(crossref)
def add_entry(self, key, entry): if not self.want_entry(key): return if key in self.entries: report_error(BibliographyDataError('repeated bibliograhpy entry: %s' % key)) return entry.collection = self entry.key = self.get_canonical_key(key) self.entries[entry.key] = entry try: crossref = entry.fields['crossref'] except KeyError: pass else: if self.wanted_entries is not None: self.wanted_entries.add(crossref)
def run_bibtex(style, database, citations=None): if citations is None: citations = database.entries.keys() tmpdir = mkdtemp(prefix='pybtex_test_') try: write_bib(path.join(tmpdir, 'test.bib'), database) write_aux(path.join(tmpdir, 'test.aux'), citations) write_bst(path.join(tmpdir, 'test.bst'), style) bibtex = Popen(('bibtex', 'test'), cwd=tmpdir, stdout=PIPE, stderr=PIPE) stdout, stderr = bibtex.communicate() if bibtex.returncode: report_error(PybtexError(stdout)) with open(path.join(tmpdir, 'test.bbl')) as bbl_file: result = bbl_file.read() return result finally: pass rmtree(tmpdir)
def add_entry(self, key, entry): if not self.want_entry(key): return if key in self.entries: report_error(BibliographyDataError('repeated bibliograhpy entry: %s' % key)) return entry.collection = self entry.key = key entry.key = key self.entries[key] = entry try: crossref = entry.fields['crossref'] except KeyError: pass else: self.crossref_count[crossref] += 1 if self.crossref_count[crossref] >= self.min_crossrefs: if self.wanted_entries is not None: self.wanted_entries.add(crossref)
def add_entry(self, key, entry): if not self.want_entry(key): return if key in self.entries: report_error( BibliographyDataError('repeated bibliograhpy entry: %s' % key)) return entry.collection = self entry.key = key entry.key = key self.entries[key] = entry try: crossref = entry.fields['crossref'] except KeyError: pass else: self.crossref_count[crossref] += 1 if self.crossref_count[crossref] >= self.min_crossrefs: if self.wanted_entries is not None: self.wanted_entries.add(crossref)
def handle_error(self, error): from pybtex.errors import report_error report_error(error)
def handle_bibstyle(self, style): if self.style is not None: report_error(AuxDataError(r'illegal, another \bibstyle command', self.context)) else: self.style = style
def handle_bibdata(self, bibdata): if self.data is not None: report_error(AuxDataError(r'illegal, another \bibdata command', self.context)) else: self.data = bibdata.split(',')
def print_warning(msg): report_error(BibTeXError(msg))
def _parse_string(self, name): """Extract various parts of the name from a string. >>> p = Person('Avinash K. Dixit') >>> print(p.first_names) [u'Avinash'] >>> print(p.middle_names) [u'K.'] >>> print(p.prelast_names) [] >>> print(p.last_names) [u'Dixit'] >>> print(p.lineage_names) [] >>> print(six.text_type(p)) Dixit, Avinash K. >>> p == Person(six.text_type(p)) True >>> p = Person('Dixit, Jr, Avinash K. ') >>> print(p.first_names) [u'Avinash'] >>> print(p.middle_names) [u'K.'] >>> print(p.prelast_names) [] >>> print(p.last_names) [u'Dixit'] >>> print(p.lineage_names) [u'Jr'] >>> print(six.text_type(p)) Dixit, Jr, Avinash K. >>> p == Person(six.text_type(p)) True >>> p = Person('abc') >>> print(p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names) [] [] [] [u'abc'] [] >>> p = Person('Viktorov, Michail~Markovitch') >>> print(p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names) [u'Michail'] [u'Markovitch'] [] [u'Viktorov'] [] """ def process_first_middle(parts): try: self.first_names.append(parts[0]) self.middle_names.extend(parts[1:]) except IndexError: pass def process_von_last(parts): # von cannot be the last name in the list von_last = parts[:-1] definitely_not_von = parts[-1:] if von_last: von, last = rsplit_at(von_last, is_von_name) self.prelast_names.extend(von) self.last_names.extend(last) self.last_names.extend(definitely_not_von) def find_pos(lst, pred): for i, item in enumerate(lst): if pred(item): return i return i + 1 def split_at(lst, pred): """Split the given list into two parts. The second part starts with the first item for which the given predicate is True. """ pos = find_pos(lst, pred) return lst[:pos], lst[pos:] def rsplit_at(lst, pred): rpos = find_pos(reversed(lst), pred) pos = len(lst) - rpos return lst[:pos], lst[pos:] def is_von_name(string): if string[0].isupper(): return False if string[0].islower(): return True else: for char, brace_level in scan_bibtex_string(string): if brace_level == 0 and char.isalpha(): return char.islower() elif brace_level == 1 and char.startswith('\\'): return special_char_islower(char) return False def special_char_islower(special_char): control_sequence = True for char in special_char[1:]: # skip the backslash if control_sequence: if not char.isalpha(): control_sequence = False else: if char.isalpha(): return char.islower() return False parts = split_tex_string(name, ',') if len(parts) > 3: report_error(InvalidNameString(name)) last_parts = parts[2:] parts = parts[:2] + [' '.join(last_parts)] if len(parts) == 3: # von Last, Jr, First process_von_last(split_tex_string(parts[0])) self.lineage_names.extend(split_tex_string(parts[1])) process_first_middle(split_tex_string(parts[2])) elif len(parts) == 2: # von Last, First process_von_last(split_tex_string(parts[0])) process_first_middle(split_tex_string(parts[1])) elif len(parts) == 1: # First von Last parts = split_tex_string(name) first_middle, von_last = split_at(parts, is_von_name) if not von_last and first_middle: last = first_middle.pop() von_last.append(last) process_first_middle(first_middle) process_von_last(von_last) else: # should hot really happen raise ValueError(name)
def _parse_string(self, name): """Extract various parts of the name from a string. >>> p = Person('Avinash K. Dixit') >>> print p.first_names ['Avinash'] >>> print p.middle_names ['K.'] >>> print p.prelast_names [] >>> print p.last_names ['Dixit'] >>> print p.lineage_names [] >>> print unicode(p) Dixit, Avinash K. >>> p == Person(unicode(p)) True >>> p = Person('Dixit, Jr, Avinash K. ') >>> print p.first_names ['Avinash'] >>> print p.middle_names ['K.'] >>> print p.prelast_names [] >>> print p.last_names ['Dixit'] >>> print p.lineage_names ['Jr'] >>> print unicode(p) Dixit, Jr, Avinash K. >>> p == Person(unicode(p)) True >>> p = Person('abc') >>> print p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names [] [] [] ['abc'] [] >>> p = Person('Viktorov, Michail~Markovitch') >>> print p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names ['Michail'] ['Markovitch'] [] ['Viktorov'] [] """ def process_first_middle(parts): try: self.first_names.append(parts[0]) self.middle_names.extend(parts[1:]) except IndexError: pass def process_von_last(parts): # von cannot be the last name in the list von_last = parts[:-1] definitely_not_von = parts[-1:] if von_last: von, last = rsplit_at(von_last, is_von_name) self.prelast_names.extend(von) self.last_names.extend(last) self.last_names.extend(definitely_not_von) def find_pos(lst, pred): for i, item in enumerate(lst): if pred(item): return i return i + 1 def split_at(lst, pred): """Split the given list into two parts. The second part starts with the first item for which the given predicate is True. """ pos = find_pos(lst, pred) return lst[:pos], lst[pos:] def rsplit_at(lst, pred): rpos = find_pos(reversed(lst), pred) pos = len(lst) - rpos return lst[:pos], lst[pos:] def is_von_name(string): if string[0].isupper(): return False if string[0].islower(): return True else: for char, brace_level in scan_bibtex_string(string): if brace_level == 0 and char.isalpha(): return char.islower() elif brace_level == 1 and char.startswith('\\'): return special_char_islower(char) return False def special_char_islower(special_char): control_sequence = True for char in special_char[1:]: # skip the backslash if control_sequence: if not char.isalpha(): control_sequence = False else: if char.isalpha(): return char.islower() return False parts = split_tex_string(name, ',') if len(parts) > 3: report_error(InvalidNameString(name)) last_parts = parts[2:] parts = parts[:2] + [' '.join(last_parts)] if len(parts) == 3: # von Last, Jr, First process_von_last(split_tex_string(parts[0])) self.lineage_names.extend(split_tex_string(parts[1])) process_first_middle(split_tex_string(parts[2])) elif len(parts) == 2: # von Last, First process_von_last(split_tex_string(parts[0])) process_first_middle(split_tex_string(parts[1])) elif len(parts) == 1: # First von Last parts = split_tex_string(name) first_middle, von_last = split_at(parts, is_von_name) if not von_last and first_middle: last = first_middle.pop() von_last.append(last) process_first_middle(first_middle) process_von_last(von_last) else: # should hot really happen raise ValueError(name)