def getChapterUrls(self): feedback.debug("domain: " + str(self.domain)) doc = self.getDomObject() obj_a = doc.cssselect("a") urls = [] for item in obj_a: if not "href" in item.attrib.keys(): continue m = re.match( """(//%s/manga/%s/[^"]+)""" % (self.domain, self.name), item.attrib["href"]) if not m: continue target_url = "http:" + m.group(1) if not target_url in urls: urls.append(target_url) if len(urls) < 1: raise ComicEngine.ComicError("No URLs returned from %s" % self.url) util.naturalSort(urls, ".+/c([0-9.]+)/") # I've seen one series which was a load of "chapter 1" in different volumes... how to deal with that ? feedback.debug(urls) return urls
def getPageUrls(self): doc = self.getDomObject() options = doc.cssselect("#selectpage select option") urls = [] for opt in options: path = opt.attrib["value"] urls.append("http://%s%s" % (self.domain, path)) util.naturalSort(urls, ".+/([0-9]+)$") return urls
def getChapterUrls(self): doc = self.getDomObject() chapters = doc.get_element_by_id("chapterlist").cssselect("tr td a") urls = [] for chap in chapters: link = chap.attrib["href"] chapter_num = util.regexGroup("/%s/([0-9.]+)$" % self.name, link) if chapter_num: urls.append("%s/%s" % (self.url, chapter_num)) util.naturalSort(urls) return urls
def getChapterUrls(self): dom = self.getDomObject() chapters = dom.get_element_by_id("chapterlist").cssselect("td a") urls = [] for elem_a in chapters: path = elem_a.attrib['href'] if re.match("/%s/[0-9.]+" % self.name, path): urls.append("https://%s%s" % (self.domain, path)) util.naturalSort(urls, ".+/([0-9.]+)$") return urls
def getChapterUrls(self): dom = self.getDomObject() alinks = dom.cssselect("table tr td a") chaplinks = [] for elem_a in alinks: href = elem_a.attrib['href'] if re.match("/r/%s" % self.name, href): chaplinks.append("https://readms.net"+href) util.naturalSort(chaplinks, ".+/%s/([0-9.]+)/.+") return chaplinks
def getPageUrls(self): doc = self.getDomObject() div = doc.get_element_by_id("top_chapter_list").getparent() options = div.cssselect("select option") urls = [] for opt in options: pagenum = opt.text_content() if re.match("^[0-9]+$", pagenum): urls.append("%s/%i.html" % (self.url, int(pagenum))) util.naturalSort(urls) return urls
def getChapterUrls(self): doc = self.getDomObject() chapter_links = doc.cssselect("li span a") urls = [] for a in chapter_links: if "href" not in a.attrib.keys(): continue link = a.attrib['href'] if re.match("//.+?/manga/[^/]+/c[0-9.]+/", link): urls.append("http:" + link) util.naturalSort(urls) return urls
def getPageUrls(self): doc = self.getDomObject() top_bar = doc.get_element_by_id("top_bar") #options = top_bar.get_elements_by_tag_name("option") options = top_bar.cssselect("option") urls = [] base_url = self.getBaseChapterUrl() for option in options: m = re.match("^[0-9]+$", option.attrib["value"]) if not m: continue v = int(m.group(0)) if v < 1: continue urls.append("%s/%i.html" % (base_url, v)) util.naturalSort(urls, ".+/([0-9.]+)\\.html") return urls