예제 #1
0
    def getChapterUrls(self):
        feedback.debug("domain: " + str(self.domain))

        doc = self.getDomObject()
        obj_a = doc.cssselect("a")

        urls = []
        for item in obj_a:
            if not "href" in item.attrib.keys():
                continue
            m = re.match(
                """(//%s/manga/%s/[^"]+)""" % (self.domain, self.name),
                item.attrib["href"])
            if not m:
                continue
            target_url = "http:" + m.group(1)
            if not target_url in urls:
                urls.append(target_url)

        if len(urls) < 1:
            raise ComicEngine.ComicError("No URLs returned from %s" % self.url)

        util.naturalSort(urls, ".+/c([0-9.]+)/")
        # I've seen one series which was a load of "chapter 1" in different volumes... how to deal with that ?
        feedback.debug(urls)
        return urls
예제 #2
0
    def getPageUrls(self):
        doc = self.getDomObject()
        options = doc.cssselect("#selectpage select option")
        urls = []

        for opt in options:
            path = opt.attrib["value"]
            urls.append("http://%s%s" % (self.domain, path))

        util.naturalSort(urls, ".+/([0-9]+)$")

        return urls
예제 #3
0
    def getChapterUrls(self):
        doc = self.getDomObject()
        chapters = doc.get_element_by_id("chapterlist").cssselect("tr td a")
        urls = []

        for chap in chapters:
            link = chap.attrib["href"]
            chapter_num = util.regexGroup("/%s/([0-9.]+)$" % self.name, link)
            if chapter_num:
                urls.append("%s/%s" % (self.url, chapter_num))

        util.naturalSort(urls)

        return urls
예제 #4
0
    def getChapterUrls(self):
        dom = self.getDomObject()
        chapters = dom.get_element_by_id("chapterlist").cssselect("td a")

        urls = []

        for elem_a in chapters:
            path = elem_a.attrib['href']
            if re.match("/%s/[0-9.]+" % self.name, path):
                urls.append("https://%s%s" % (self.domain, path))

        util.naturalSort(urls, ".+/([0-9.]+)$")

        return urls
예제 #5
0
    def getChapterUrls(self):
        dom = self.getDomObject()
        alinks = dom.cssselect("table tr td a")

        chaplinks = []

        for elem_a in alinks:
            href = elem_a.attrib['href']
            if re.match("/r/%s" % self.name, href):
                chaplinks.append("https://readms.net"+href)

        util.naturalSort(chaplinks, ".+/%s/([0-9.]+)/.+")

        return chaplinks
예제 #6
0
    def getPageUrls(self):
        doc = self.getDomObject()
        div = doc.get_element_by_id("top_chapter_list").getparent()
        options = div.cssselect("select option")
        urls = []

        for opt in options:
            pagenum = opt.text_content()

            if re.match("^[0-9]+$", pagenum):
                urls.append("%s/%i.html" % (self.url, int(pagenum)))

        util.naturalSort(urls)

        return urls
예제 #7
0
    def getChapterUrls(self):
        doc = self.getDomObject()
        chapter_links = doc.cssselect("li span a")
        urls = []

        for a in chapter_links:
            if "href" not in a.attrib.keys():
                continue

            link = a.attrib['href']
            if re.match("//.+?/manga/[^/]+/c[0-9.]+/", link):
                urls.append("http:" + link)

        util.naturalSort(urls)

        return urls
예제 #8
0
    def getPageUrls(self):

        doc = self.getDomObject()
        top_bar = doc.get_element_by_id("top_bar")
        #options = top_bar.get_elements_by_tag_name("option")
        options = top_bar.cssselect("option")
        urls = []
        base_url = self.getBaseChapterUrl()

        for option in options:
            m = re.match("^[0-9]+$", option.attrib["value"])
            if not m:
                continue
            v = int(m.group(0))
            if v < 1:
                continue

            urls.append("%s/%i.html" % (base_url, v))

        util.naturalSort(urls, ".+/([0-9.]+)\\.html")
        return urls