def _fetch_next_page(prev_link, content): q = urlparse.urlparse(prev_link).query d = query2dict(q) prev_page = d.get('page', '') if prev_page: match = re.search(r'<a href=(?P<next>[^">]*)>' + str(int(prev_page)+1) + r'</a>', content) next_link = match.group('next') if match else '' next_link = patch_url(prev_link, next_link) else: next_link = '' return next_link
def _fetch_next_page(prev_link, content): q = urlparse.urlparse(prev_link).query d = query2dict(q) prev_page = d.get('page', '') if prev_page: match = re.search( r'<a href=(?P<next>[^">]*)>' + str(int(prev_page) + 1) + r'</a>', content) next_link = match.group('next') if match else '' next_link = patch_url(prev_link, next_link) else: next_link = '' return next_link
def _is_over_limit(self, link, limit): q = urlparse.urlparse(link).query d = query2dict(q) return (int(d["p"]) * int(self.pmax)) <= limit
def _is_over_limit(self, link, limit): q = urlparse.urlparse(link).query d = query2dict(q) return (int(d['pn']) + int(self.pmax)) <= limit