def reset(self): ArticleParser.reset(self) self.issue = None self.text_frame = None self.a_text = self.append_text self.issue_text = self.append_text self.issues = {}
def end_div(self): if self.text_frame == "author": self.get_text() self.text_frame = "blank" elif self.text_frame == "blank": self.get_text() self.text_frame = "pages" self.a_frame = "pages" elif self.text_frame == "pages": text = self.get_text() match = re.compile("(\d+.*)PDF", re.DOTALL).search(text) if not match: #just ignore this self.article = None self.a_frame = None self.text_frame = None ArticleParser.end_div(self) return text = match.groups()[0] match = map(Page, re.compile("\d+").findall(text)) if len(match) == 1: page = match[0] self.article.set_pages(page, page) elif len(match) == 2: start, end = match self.article.set_pages(start, end) else: raise HTMLException("%s is not valid text input for APS parser" % text) self.text_frame = None self.a_frame = None ArticleParser.end_div(self)
def reset(self): ArticleParser.reset(self) #treat paragraph breaks as divisions self.start_p = self.start_div self.end_p = self.end_div self.title_text = self.append_text self.citation_text = self.append_text
def reset(self): ArticleParser.reset(self) #treat paragraph breaks as divisions self.start_p = self.start_div self.end_p = self.end_div self.citation = [] self.title = []
def reset(self): ArticleParser.reset(self) #treat paragraph breaks as divisions self.start_p = self.start_div self.end_p = self.end_div self.pages_text = self.append_text self.title_text = self.append_text self.author_text = self.append_text self.blank_text = self.append_text
def reset(self): ArticleParser.reset(self) self.url = None
def reset(self): ArticleParser.reset(self) self.title_text = self.append_text self.pages_text = self.append_text self.citation_text = self.append_text
def reset(self): ArticleParser.reset(self) self.page_text = self.append_text self.a_text = self.append_text self.url = None