def get_references(self, element): ''' Get references using the handy comments added by Scielo. But then remove all of the HTML fluff to get just the text we want. ''' refs = re.findall(r"<!-- ref -->(.*?)<!-- end-ref -->", self.wd.page_source) refs = self._clean_refs(refs) return mining.TagList(refs, "reference")
def get_keywords(self, element): """Gather article keywords from centaurminer.Element object. Args: element(:obj: `centaurminer.Element`): Page element to gather keywords from. Returns: String comprising keywords separated by HTML-like tags. """ keys = self.__parse_keywords(self.get(element)) if keys: return mining.TagList(keys, "keyword") return None
def get_organization(self, element): return mining.TagList(self.get(element, several=True), tag='organization')
def get_authors(self, element): return mining.TagList(self.get(element, several=True), tag='author')
def get_organization_affiliated(self, element): """ Gets several `organizations_affiliated` fields and wraps them inside `html` like tags. """ return mining.TagList(self.get(element, several=True), tag='org')
def get_authors(self, element): """ Gets several `author` fields and wraps them inside `html` like tags. """ return mining.TagList(self.get(element, several=True), tag='author')
def get_references(self, element): return mining.TagList(self.get(element, several=True), tag='ref')
def get_authors(self, element): """Returns a string with article authors from search engine, separated by HTML-like elements""" authors = map(self.__format_author, self.get(element, several=True)) return mining.TagList(list(dict.fromkeys(authors)), "author")
def get_organization_affiliated(self, element): """Returns a string with article authors organizations, separated by HTML-like elements""" orgs = [ o.split('</sup>')[-1] for o in self.get(element, several=True) ] return mining.TagList(orgs, "orgs")
def get_organization_affiliated(self, element): orgs = self.get(element, several=True) return mining.TagList(orgs, "org")
def get_title_translated(self, element): titles = self.get(element, several=True) return mining.TagList(titles, "title")
def get_organization_affiliated(self, element): orgs = self.get(element, several=True) # All orgs are doubled in this format orgs = orgs[::2] return mining.TagList(orgs, "org")
def get_references(self, element): references = self.get(element, several=True) references = self._clean_refs(references) return mining.TagList(references, "reference")