Exemplo n.º 1
0
def parse_raw_abstracts_and_authors(fname):
	"""
	Hack to grab the unadultered 'abstract' and 'author' keys from bib files.
	"""

	def fgenerator(fname):
		for line in open(fname):
			yield line

	raw_abstracts = {}
	raw_authors = {}

	proc = None
	fg = fgenerator(fname)
	for line in fg:
		if line[0] == '@':
			line = line.rstrip()
			if line[-1] != ',':
				logger.critical_leader('Citation declaration must be on its own line')
				logger.critical_leader('This line must end with a comma')
				logger.critical_leader('Offending line:')
				logger.critical_leader('\t>>>'+line+'<<<')
				logger.critical('Failed to parse ' + fname)
			proc = line.split('{')[1].split(',')[0]
		line = line.lstrip()
		if line[0:6] == 'author':
			if proc is None:
				logger.critical_leader('Found an author before a citation?')
				logger.critical_leader('Something has gone wrong. I found this line:')
				logger.critical_leader('\t>>>'+line.rstrip()+'<<<')
				logger.critical_leader('Outside of a citation?')
				logger.critical('Failed to parse ' + fname)
			raw_authors[proc] = ''
			line = line.split('=')[1].lstrip()[1:]
			scope = 1 # We have already stripped the leading '{'
			while True:
				scope += line.count('{') - line.count('}')
				line = line.lstrip()
				if scope == 0:
					line = line.rstrip()
					if line[-2:] != '},':
						logger.critical_leader('Author last line must end with "},"')
						logger.critical_leader('Instead, I think the author ends with line:')
						logger.critical_leader('\t>>>'+line+'<<<')
						logger.critical('Failed to parse ' + fname)
					line = line[:-2]
					raw_authors[proc] += line
					break
				else:
					if len(line) == 0:
						raw_authors[proc] += '\n'
					else:
						raw_authors[proc] += line
					line = next(fg)
		elif line[0:8] == 'abstract':
			if proc is None:
				logger.critical_leader('Found an abstract before a citation?')
				logger.critical_leader('Something has gone wrong. I found this line:')
				logger.critical_leader('\t>>>'+line.rstrip()+'<<<')
				logger.critical_leader('Outside of a citation?')
				logger.critical('Failed to parse ' + fname)
			raw_abstracts[proc] = ''
			line = line.split('=')[1].lstrip()[1:]
			scope = 1 # We have already stripped the leading '{'
			while True:
				scope += line.count('{') - line.count('}')
				line = line.lstrip()
				if scope == 0:
					line = line.rstrip()
					if line[-2:] != '},':
						logger.critical_leader('Abstract block last line must end with "},"')
						logger.critical_leader('Instead, I think the abstract ends with line:')
						logger.critical_leader('\t>>>'+line+'<<<')
						logger.critical('Failed to parse ' + fname)
					line = line[:-2]
					raw_abstracts[proc] += line
					break
				else:
					if len(line) == 0:
						raw_abstracts[proc] += '\n'
					else:
						raw_abstracts[proc] += line
					line = next(fg)
			proc = None

	return raw_abstracts, raw_authors
Exemplo n.º 2
0
def parse_raw_abstracts_and_authors(fname):
	"""
	Hack to grab the unadultered 'abstract' and 'author' keys from bib files.
	"""

	def fgenerator(fname):
		for line in open(fname):
			yield line

	raw_abstracts = {}
	raw_authors = {}

	proc = None
	fg = fgenerator(fname)
	for line in fg:
		if line[0] == '@':
			line = line.rstrip()
			if line[-1] != ',':
				logger.critical_leader('Citation declaration must be on its own line')
				logger.critical_leader('This line must end with a comma')
				logger.critical_leader('Offending line:')
				logger.critical_leader('\t>>>'+line+'<<<')
				logger.critical('Failed to parse ' + fname)
			proc = line.split('{')[1].split(',')[0]
		line = line.lstrip()
		if line[0:6] == 'author':
			if proc is None:
				logger.critical_leader('Found an author before a citation?')
				logger.critical_leader('Something has gone wrong. I found this line:')
				logger.critical_leader('\t>>>'+line.rstrip()+'<<<')
				logger.critical_leader('Outside of a citation?')
				logger.critical('Failed to parse ' + fname)
			raw_authors[proc] = ''
			line = line.split('=')[1].lstrip()[1:]
			scope = 1 # We have already stripped the leading '{'
			while True:
				scope += line.count('{') - line.count('}')
				line = line.lstrip()
				if scope == 0:
					line = line.rstrip()
					if line[-2:] != '},':
						logger.critical_leader('Author last line must end with "},"')
						logger.critical_leader('Instead, I think the author ends with line:')
						logger.critical_leader('\t>>>'+line+'<<<')
						logger.critical('Failed to parse ' + fname)
					line = line[:-2]
					raw_authors[proc] += line
					break
				else:
					if len(line) == 0:
						raw_authors[proc] += '\n'
					else:
						raw_authors[proc] += line
					line = next(fg)
		elif line[0:8] == 'abstract':
			if proc is None:
				logger.critical_leader('Found an abstract before a citation?')
				logger.critical_leader('Something has gone wrong. I found this line:')
				logger.critical_leader('\t>>>'+line.rstrip()+'<<<')
				logger.critical_leader('Outside of a citation?')
				logger.critical('Failed to parse ' + fname)
			raw_abstracts[proc] = ''
			line = line.split('=')[1].lstrip()[1:]
			scope = 1 # We have already stripped the leading '{'
			while True:
				scope += line.count('{') - line.count('}')
				line = line.lstrip()
				if scope == 0:
					line = line.rstrip()
					if line[-2:] != '},':
						logger.critical_leader('Abstract block last line must end with "},"')
						logger.critical_leader('Instead, I think the abstract ends with line:')
						logger.critical_leader('\t>>>'+line+'<<<')
						logger.critical('Failed to parse ' + fname)
					line = line[:-2]
					raw_abstracts[proc] += line
					break
				else:
					if len(line) == 0:
						raw_abstracts[proc] += '\n'
					else:
						raw_abstracts[proc] += line
					line = next(fg)
			proc = None

	return raw_abstracts, raw_authors
Exemplo n.º 3
0
	def __init__ (self, bibkey, entry, bibgroup, raw_authors, raw_abstracts):
		self.bibkey = bibkey
		self.raw_abstract = raw_abstracts

		authors = []
		if 'author' in entry.persons:
			for person in entry.persons['author']:
				authors.append(get_name(person))
		entry.fields['authors'] = ', '.join(authors)

		entry.fields['badge'] = bibgroup[0].upper()
		if entry.fields['badge'] in ('P', 'D'):
			entry.fields['badge'] = 'PD'
		entry.fields['type'] = bibgroup
		entry.fields['display-type'] = WORK_TYPES[bibgroup]


		self.paths = {}

		# Try to copy the PDF to the content directory
		if os.path.exists(os.path.join('static', 'cv', bibkey + '.pdf')):
			cp(os.path.join('static', 'cv', bibkey + '.pdf'), LOCAL_CONTENT_DIR)
			self.paths['pdf'] = os.path.join(CONTENT_DIR, bibkey + '.pdf')
		else:
			if 'to-appear' in entry.fields and entry.fields['to-appear'] == '1':
				logger.warn('No PDF for "To Appear" paper {}'.format(bibkey))
			else:
				logger.critical_leader('Unable to find {}'.format(bibkey + '.pdf'))
				logger.critical_leader('\tYou need to add a copy of your paper to the cv/ direcotry')
				logger.critical('\tYour paper should be named the same as the key to the bib entry')

		# Try to copy the paper source to the content directory
		self.missing_zip = True
		for ext in ['.zip', '.tgz', '.tar.gz']:
			if os.path.exists(os.path.join('cv', bibkey + ext)):
				cp(os.path.join('cv', bibkey + ext), LOCAL_CONTENT_DIR)
				self.paths['tex_source'] = os.path.join(CONTENT_DIR, bibkey + ext)
				self.missing_zip = False
				break

		# Grab a ref to the talk if it exists
		if 'series' in entry.fields:
			series_short = entry.fields['series'].lower().replace(' ', '').replace("'", '')
			if os.path.exists(os.path.join('static', 'talks', series_short+'.pdf')):
				# talk named after conference
				self.paths['talk'] = '/talks.html#{}'.format(series_short)
		if 'talk' not in self.paths:
			if os.path.exists(os.path.join('static', 'talks', bibkey+'.pdf')):
				# talk named after bibkey
				self.paths['talk'] = '/talks.html#{}'.format(bibkey)

		# Possibly remove \url{} from the url entry if needed
		try:
			if entry.fields['conference-url'][0:5] == '\\url{':
				entry.fields['conference-url'] = entry.fields['conference-url'][5:-1]
		except KeyError:
			logger.warn("Unable to find conference URL for {}".format(bibkey))
			logger.warn('\tThis entry will be missing a link to the conference')

		# Possibly remove \url{} from the video link if needed
		try:
			if entry.fields['video-url'][0:5] == '\\url{':
				entry.fields['video-url'] = entry.fields['video-url'][5:-1]
		except KeyError:
			pass

		# Construct the best date we can for this publication
		try:
			year = entry.fields['year']
		except KeyError:
			logger.critical_leader("Bib entry {} is missing publication year.".format(bibkey))
			logger.critical("\tPlease add a year entry and try again.")

		try:
			month = entry.fields['month']
		except KeyError:
			try:
				month = entry.fields['mon']
			except KeyError:
				month = None
				logger.warn("Bib entry {} is missing publication month.".format(bibkey))
				logger.warn("\tPublication sort order may be affected. Please add a month entry")
		if month:
			if month.isalpha():
				month = MONTH_CONV[month.lower()[0:3]]
			else:
				month = int(month)

		# good enough to sort
		self.date = 365 * int(year)
		if month:
			self.date += 30 * (month - 1)

		# Get values for content that starts out hidden
		self.hiddens = {}
		hiddens_add_bibtex(self.hiddens, bibkey, entry, raw_authors)
		hiddens_add_abstract(self.hiddens, bibkey, entry, raw_abstracts)

		# Add html-friendly entries; note this must be done *after* generating hiddens
		entry.fields['title-html']   = latex_to_html(entry.fields['title'])

		authors = latex_to_html(entry.fields['authors'])
		if len(authors.split(',')) == 1:
			entry.fields['authors-html'] = authors
		elif len(authors.split(',')) == 2:
			entry.fields['authors-html'] = ' and'.join(authors.split(','))
		else:
			authors = authors.split(',')
			authors.insert(len(authors)-1, ' and')
			entry.fields['authors-html'] = ','.join(authors[:-1]) + authors[-1]

		try:
			entry.fields['booktitle-html'] = latex_to_html(entry.fields['booktitle'])
			try:
				series = latex_to_html(entry.fields['series'])
				series = series.replace(' ', '&nbsp;')
				entry.fields['booktitle-html'] += ' (' + series + ')'
			except KeyError:
				pass
		except KeyError:
			pass

		if 'journal' in entry.fields:
			entry.fields['journal-html'] = latex_to_html(entry.fields['journal'])
			if 'series' in entry.fields:
				series = latex_to_html(entry.fields['series'])
				series = series.replace(' ', '&nbsp;')
				entry.fields['journal-html'] += ' (' + series + ')'
			if 'volume' not in entry.fields:
				logger.error('{}: a volume key is required for journals'.format(bibkey))
			if 'number' not in entry.fields:
				logger.error('{}: a number key (issue) is required for journals'.format(bibkey))

		if 'journal' in entry.fields and 'booktitle' in entry.fields:
			logger.error('{} has a journal and booktitle entry.'.format(bibkey))
			logger.error('This is probably not what your want.')


		try:
			entry.fields['acceptance-percent'] =\
					float(entry.fields['acceptance-accepted']) /\
					float(entry.fields['acceptance-total']) * 100
		except KeyError:
			pass

		self.entry = entry
Exemplo n.º 4
0
	def __init__ (self, bibkey, entry, bibgroup, raw_authors, raw_abstracts):
		self.bibkey = bibkey
		self.raw_abstract = raw_abstracts

		authors = []
		if 'author' in entry.persons:
			for person in entry.persons['author']:
				authors.append(get_name(person))
		entry.fields['authors'] = ', '.join(authors)

		entry.fields['badge'] = bibgroup[0].upper()
		if entry.fields['badge'] in ('P', 'D'):
			entry.fields['badge'] = 'PD'
		entry.fields['type'] = bibgroup
		entry.fields['display-type'] = WORK_TYPES[bibgroup]


		self.paths = {}

		# Try to copy the PDF to the content directory
		if os.path.exists(os.path.join('static', 'cv', bibkey + '.pdf')):
			cp(os.path.join('static', 'cv', bibkey + '.pdf'), LOCAL_CONTENT_DIR)
			self.paths['pdf'] = os.path.join(CONTENT_DIR, bibkey + '.pdf')
		else:
			if 'to-appear' in entry.fields and entry.fields['to-appear'] == '1':
				logger.warn('No PDF for "To Appear" paper {}'.format(bibkey))
			else:
				logger.critical_leader('Unable to find {}'.format(bibkey + '.pdf'))
				logger.critical_leader('\tYou need to add a copy of your paper to the cv/ direcotry')
				logger.critical('\tYour paper should be named the same as the key to the bib entry')

		# Try to copy the paper source to the content directory
		self.missing_zip = True
		for ext in ['.zip', '.tgz', '.tar.gz']:
			if os.path.exists(os.path.join('cv', bibkey + ext)):
				cp(os.path.join('cv', bibkey + ext), LOCAL_CONTENT_DIR)
				self.paths['tex_source'] = os.path.join(CONTENT_DIR, bibkey + ext)
				self.missing_zip = False
				break

		# Grab a ref to the talk if it exists
		if 'series' in entry.fields:
			# talk named after conference
			series_short = entry.fields['series'].lower().replace(' ', '').replace("'", '')
			if os.path.exists(os.path.join('static', 'talks', series_short+'.pptx')):
				self.paths['talk_pptx'] = '/talks/{}.pptx'.format(series_short)
			if os.path.exists(os.path.join('static', 'talks', series_short+'.pdf')):
				self.paths['talk_pdf'] = '/talks/{}.pdf'.format(series_short)
		if 'talk' not in self.paths:
			# talk named after bibkey
			if os.path.exists(os.path.join('static', 'talks', bibkey+'.pptx')):
				self.paths['talk_pptx'] = '/talks/{}.pptx'.format(bibkey)
			if os.path.exists(os.path.join('static', 'talks', bibkey+'.pdf')):
				self.paths['talk_pdf'] = '/talks/{}.pdf'.format(bibkey)

		# Possibly remove \url{} from the url entry if needed
		try:
			if entry.fields['conference-url'][0:5] == '\\url{':
				entry.fields['conference-url'] = entry.fields['conference-url'][5:-1]
		except KeyError:
			logger.warn("Unable to find conference URL for {}".format(bibkey))
			logger.warn('\tThis entry will be missing a link to the conference')

		# Possibly remove \url{} from the video link if needed
		try:
			if entry.fields['video-url'][0:5] == '\\url{':
				entry.fields['video-url'] = entry.fields['video-url'][5:-1]
		except KeyError:
			pass

		# Construct the best date we can for this publication
		try:
			year = entry.fields['year']
		except KeyError:
			logger.critical_leader("Bib entry {} is missing publication year.".format(bibkey))
			logger.critical("\tPlease add a year entry and try again.")

		try:
			month = entry.fields['month']
		except KeyError:
			try:
				month = entry.fields['mon']
			except KeyError:
				month = None
				logger.warn("Bib entry {} is missing publication month.".format(bibkey))
				logger.warn("\tPublication sort order may be affected. Please add a month entry")
		if month:
			if month.isalpha():
				month = MONTH_CONV[month.lower()[0:3]]
			else:
				month = int(month)

		# good enough to sort
		self.date = 365 * int(year)
		if month:
			self.date += 30 * (month - 1)

		# Get values for content that starts out hidden
		self.hiddens = {}
		hiddens_add_bibtex(self.hiddens, bibkey, entry, raw_authors)
		hiddens_add_abstract(self.hiddens, bibkey, entry, raw_abstracts)

		# Add html-friendly entries; note this must be done *after* generating hiddens
		entry.fields['title-html']   = latex_to_html(entry.fields['title'])

		authors = latex_to_html(entry.fields['authors'])
		if len(authors.split(',')) == 1:
			entry.fields['authors-html'] = authors
		elif len(authors.split(',')) == 2:
			entry.fields['authors-html'] = ' and'.join(authors.split(','))
		else:
			authors = authors.split(',')
			authors.insert(len(authors)-1, ' and')
			entry.fields['authors-html'] = ','.join(authors[:-1]) + authors[-1]

		try:
			entry.fields['booktitle-html'] = latex_to_html(entry.fields['booktitle'])
			try:
				series = latex_to_html(entry.fields['series'])
				series = series.replace(' ', '&nbsp;')
				entry.fields['booktitle-html'] += ' (' + series + ')'
			except KeyError:
				pass
		except KeyError:
			pass

		if 'journal' in entry.fields:
			entry.fields['journal-html'] = latex_to_html(entry.fields['journal'])
			if 'series' in entry.fields:
				series = latex_to_html(entry.fields['series'])
				series = series.replace(' ', '&nbsp;')
				entry.fields['journal-html'] += ' (' + series + ')'
			if 'volume' not in entry.fields:
				logger.error('{}: a volume key is required for journals'.format(bibkey))
			if 'number' not in entry.fields:
				logger.error('{}: a number key (issue) is required for journals'.format(bibkey))

		if 'journal' in entry.fields and 'booktitle' in entry.fields:
			logger.error('{} has a journal and booktitle entry.'.format(bibkey))
			logger.error('This is probably not what your want.')


		try:
			entry.fields['acceptance-percent'] =\
					float(entry.fields['acceptance-percent'])
		except KeyError:
			pass
		try:
			entry.fields['acceptance-percent'] =\
					float(entry.fields['acceptance-accepted']) /\
					float(entry.fields['acceptance-total']) * 100
		except KeyError:
			pass

		self.entry = entry