def set_ref_from_entry(key, bib_data, ref_doc): stream = StringIO.StringIO() writer = bib_out.Writer(encoding='ascii') writer.write_entry(key, bib_data.entries[key], stream) ref_doc['bibtex'] = stream.getvalue() try: for field in bib_data.entries[key].fields: value = bib_data.entries[key].fields[field] fieldname = field.strip().lower() rawvalue = value value = bibtex_purify(value) if fieldname == 'year': ref_doc['year'] = int(value) elif fieldname == 'doi': doiPrepender = 'http://dx.doi.org/' if not rawvalue.startswith("http://"): rawvalue = doiPrepender + rawvalue ref_doc['url'] = rawvalue elif fieldname == 'url': ref_doc['url'] = rawvalue else: ref_doc[fieldname] = value authorsAsText = "" firstAuthorSortKey = "" count = 0 if bib_data.entries[key].persons: numberOfAuthors = len(bib_data.entries[key].persons['author']) for person in bib_data.entries[key].persons['author']: first = person.get_part_as_text('first') last = person.get_part_as_text('last') simpleAuthor = bibtex_purify(first + ' ' + last) if 'emph' in simpleAuthor: simpleAuthor = simpleAuthor.replace('emph', '') authorsAsText += simpleAuthor if numberOfAuthors > 1: if count != (numberOfAuthors - 1): authorsAsText += ', ' if count == (numberOfAuthors - 2): authorsAsText += ' and ' count += 1 firstAuthor = bib_data.entries[key].persons['author'][0] firstAuthorSortKey = firstAuthor.get_part_as_text( 'last') + firstAuthor.get_part_as_text('first') ref_doc['authorsAsText'] = authorsAsText ref_doc['firstAuthorSortKey'] = firstAuthorSortKey except Exception, e: print "**** BIBTEX PARSING SCREWED UP ****" print e
def set_ref_from_entry(key, bib_data, ref_doc): stream = StringIO.StringIO() writer = bib_out.Writer(encoding='ascii') writer.write_entry(key, bib_data.entries[key], stream) ref_doc['bibtex'] = stream.getvalue() try: for field in bib_data.entries[key].fields: value = bib_data.entries[key].fields[field] fieldname = field.strip().lower() rawvalue = value value = bibtex_purify(value) if fieldname == 'year': ref_doc['year'] = int(value) elif fieldname == 'doi': doiPrepender = 'http://dx.doi.org/' if not rawvalue.startswith("http://"): rawvalue = doiPrepender + rawvalue ref_doc['url'] = rawvalue elif fieldname == 'url': ref_doc['url'] = rawvalue else: ref_doc[fieldname] = value authorsAsText = "" firstAuthorSortKey = "" count = 0 if bib_data.entries[key].persons: numberOfAuthors = len(bib_data.entries[key].persons['author']) for person in bib_data.entries[key].persons['author']: first = person.get_part_as_text('first') last = person.get_part_as_text('last') simpleAuthor = bibtex_purify(first + ' ' + last) if 'emph' in simpleAuthor: simpleAuthor = simpleAuthor.replace('emph', '') authorsAsText += simpleAuthor if numberOfAuthors > 1: if count != (numberOfAuthors - 1): authorsAsText += ', ' if count == (numberOfAuthors - 2): authorsAsText += ' and ' count += 1 firstAuthor = bib_data.entries[key].persons['author'][0] firstAuthorSortKey = firstAuthor.get_part_as_text('last') + firstAuthor.get_part_as_text('first') ref_doc['authorsAsText'] = authorsAsText ref_doc['firstAuthorSortKey'] = firstAuthorSortKey except Exception, e: print "**** BIBTEX PARSING SCREWED UP ****" print e
def format_name(person): def join(l): return ' '.join([name for name in l if name]) first = bibtex_purify(person.first_name) middle = bibtex_purify(person.middle_name) prelast = bibtex_purify(person.prelast_name) last = bibtex_purify(person.last_name) s = '' if last: s += join([prelast, last]) s += ", " if first or middle: s += first[0:1] + "." return s
def purify(i): s = i.pop() i.push(utils.bibtex_purify(s))
def bibtex_import(filename, taxonomyItem): parser = bib_in.Parser() bib_data = parser.parse_file(filename) writer = bib_out.Writer(encoding='ascii') for key in bib_data.entries.keys(): stream = StringIO.StringIO() writer.write_entry(key, bib_data.entries[key], stream) title = getTitle(bib_data.entries[key].fields) ref_obj = get_ref(key, title, stream.getvalue()) try: ref_obj.save() for field in bib_data.entries[key].fields: value = bib_data.entries[key].fields[field] col = get_column(field) if 'title' in field.lower(): ref_obj.title = title elif 'journal' in field.lower(): ref_obj.journal = bibtex_purify(value) elif 'year' in field.lower(): ref_obj.year = int(value) elif 'url' in field.lower(): ref_obj.url = value elif 'abstract' in field.lower(): ref_obj.abstract = value elif 'doi' in field.lower(): doiPrepender = 'http://dx.doi.org/' if not value.startswith(doiPrepender): value = doiPrepender + value ref_obj.url = value attr = ReferenceAttribute(column=col, value=value) attr.save() ref_obj.referenceAttributes.add(attr) authorsAsText = "" count = 0 if bib_data.entries[key].persons: numberOfAuthors = len(bib_data.entries[key].persons['author']) for person in bib_data.entries[key].persons['author']: first = person.get_part_as_text('first') middle = person.get_part_as_text('middle') prelast = person.get_part_as_text('prelast') last = person.get_part_as_text('last') lineage = person.get_part_as_text('lineage') simpleAuthor = bibtex_purify(first + ' ' + last) if 'emph' in simpleAuthor: simpleAuthor = simpleAuthor.replace('emph', '') authorsAsText += simpleAuthor if numberOfAuthors > 1: if count != (numberOfAuthors -1): authorsAsText += ', ' if count == (numberOfAuthors - 2): authorsAsText += ' and ' count += 1 try: author, created = ReferenceAuthor.objects.get_or_create(first_name=first, last_name=last) except Exception, e: logger.debug( 'Author ' + first + ' ' + last + ' exists in multiple places. Error is ' + e.message) author = ReferenceAuthor.objects.filter(first_name=first).filter(last_name=last).__getitem__(0) created = False if created: author.middle_name = middle author.prelast_name = prelast author.lineage = lineage author.save() if not author in ref_obj.authors.all(): ref_obj.authors.add(author) ref_obj.authorsAsText = authorsAsText ref_obj.save() if not ref_obj in taxonomyItem.references.all(): taxonomyItem.references.add(ref_obj) except Exception, e: logger.log(e.message) connection._rollback()
def getTitle(fields): titleAliases = ['title', 'Title'] for alias in titleAliases: if fields[alias]: return bibtex_purify(fields[alias])