def journal_id(self): """ Query the journal id. Returns: str """ return get_text(self.xml, 'journal-id')
def article_title(self): """ Query the article title. Returns: str """ return get_text(self.xml, 'article-title')
def journal_title(self): """ Query the journal title. Returns: str """ return get_text(self.xml, 'journal-title')
def publisher_name(self): """ Query the publisher name. Returns: str """ return get_text(self.xml, 'publisher-name')
def volume(self): """ Query the volume number. Returns: str """ return get_text(self.xml, 'volume')
def article_id(self): """ Query the article id. Returns: str """ return get_text(self.xml, 'article-id')
def issue(self): """ Query the issue number. Returns: str """ return get_text(self.xml, 'issue')
def pub_date(self): """ Assemble the publication date in ISO format. Returns: str """ try: date = datetime.date( int(get_text(self.xml, 'pub-date year')), int(get_text(self.xml, 'pub-date month')), int(get_text(self.xml, 'pub-date day')), ) return date.isoformat() except: return None
def surname(self): """ Get the surname of the first author. Returns: str """ authors = self.xml.select('contrib') if authors: return get_text(authors[0], 'surname')
def pagination(self): """ Construct the page range. Returns: str """ fpage = get_text(self.xml, 'fpage') lpage = get_text(self.xml, 'lpage') if fpage and lpage: return '-'.join([fpage, lpage]) elif fpage: return fpage elif lpage: return lpage else: return None
def authors(self): """ Query author names. Returns: list """ author = [] for c in self.xml.select('contrib'): # Query for name parts. given_names = get_text(c, 'given-names') surname = get_text(c, 'surname') # Merge into single string. if given_names and surname: author.append(', '.join([surname, given_names])) # Accept just surname. elif surname: author.append(surname) return author
def test_get_text(tag, text): tree = BeautifulSoup(tag, 'lxml') assert get_text(tree, 'tag') == text