def _add_website(self): website = self.session.query(Website).filter_by( name=self.source).first() if not website: website = Website(name=self.source, url=get_base_url(self.url)) self.session.add(website)
def chapter_list(self): clist = self.doc('#list')('dt:eq(1)').next_all('dd').filter( lambda i, e: PyQuery(e)('a').attr('href')).map(lambda i, e: ( i, urljoin(utils.get_base_url(self.url), PyQuery(e)('a').attr('href')), PyQuery(e).text())) return clist
def chapter_list(self): clist = self.doc('.mu_contain:eq(1)')('.mulu_list:last').prev_all( '.mulu_list')('li').filter( lambda i, e: PyQuery(e)('a').attr('href')).map(lambda i, e: ( i, urljoin(utils.get_base_url(self.url), PyQuery(e)('a').attr('href')), PyQuery(e).text())) return clist
def chapter_list(self): clist = self.doc('.list-charts:first li').filter( lambda i, e: PyQuery(e)('a').attr('href') ).map( lambda i, e: (i, urljoin(utils.get_base_url(self.url), PyQuery(e)('a').attr('href')), self.refine(PyQuery(e).text())) ) return clist
def _chapter_list_with_sel(self, selector, chap_type): clist = self.doc(selector).filter(lambda i, e: PyQuery(e) ('a').attr('href')) if chap_type == ChapterType.whole: clist = clist.map(lambda i, e: (i, PyQuery(e)('a').attr('href'), PyQuery(e).text())) elif chap_type == ChapterType.path: clist = clist.map(lambda i, e: ( i, urljoin(get_base_url(self.url), PyQuery(e)('a').attr('href')), PyQuery(e).text())) elif chap_type == ChapterType.last: clist = clist.map(lambda i, e: ( i, urljoin(self.url, PyQuery(e)('a').attr('href')), PyQuery(e).text())) elif chap_type == ChapterType.noscheme: clist = clist.map(lambda i, e: (i, change_url_scheme( PyQuery(e)('a').attr('href'), get_url_scheme(self.url)), PyQuery(e).text())) else: raise NameError('chap_type') return clist
def chapter_list(self): clist = self.doc('td a').map(lambda i, e: ( i, urljoin(utils.get_base_url(self.url), PyQuery(e).attr('href')), PyQuery(e).text())) return clist