def _get_article_title(tree): """ Given an ElementTree, returns article title. """ title = ElementTree(tree).find('front/article-meta/title-group/article-title') if title is None: title = ElementTree(tree).find('front/article-meta/article-categories/subj-group/subject') return ' '.join(title.itertext())
def _get_article_abstract(tree): """ Given an ElementTree, returns article abstract. """ abstract = ElementTree(tree).find('front/article-meta/abstract') if abstract is not None: return ' '.join(abstract.itertext()) else: return ''
def _get_article_license_url(tree): """ Given an ElementTree, returns article license URL. """ license = ElementTree(tree).find('front/article-meta/permissions/license') try: return license.attrib['{http://www.w3.org/1999/xlink}href'] except AttributeError: # license statement is missing return '' except KeyError: # license statement is in plain text license_text = ' '.join(license.itertext()).encode('utf-8') if license_text in license_url_equivalents: return license_url_equivalents[license_text] else: # FIXME: revert this to an exception some time in the future filename = '/tmp/pmc-' + md5(license_text).hexdigest() with open(filename, 'w') as f: f.write(license_text) stderr.write("Unknown license statement:\n%s\n" % \ str(license_text))