示例#1
0
    def getChapterUrls(self):
        feedback.debug("domain: " + str(self.domain))

        doc = self.getDomObject()
        obj_a = doc.cssselect("a")

        urls = []
        for item in obj_a:
            if not "href" in item.attrib.keys():
                continue
            m = re.match(
                """(//%s/manga/%s/[^"]+)""" % (self.domain, self.name),
                item.attrib["href"])
            if not m:
                continue
            target_url = "http:" + m.group(1)
            if not target_url in urls:
                urls.append(target_url)

        if len(urls) < 1:
            raise ComicEngine.ComicError("No URLs returned from %s" % self.url)

        util.naturalSort(urls, ".+/c([0-9.]+)/")
        # I've seen one series which was a load of "chapter 1" in different volumes... how to deal with that ?
        feedback.debug(urls)
        return urls
示例#2
0
    def load(self):
        """ Loads the page data if not yet already done.

        On 500-class errors, retries up to 3 times.

        class implementors should not need to call this method.
        """
        if self.pagedata == None:
            global useragent
            retries = 3
            while retries > 0:
                feedback.debug(self.url)
                try:
                    req = urllib.request.Request(
                        self.url, data=None, headers={'User-Agent': useragent})
                    self.response = urllib.request.urlopen(req)
                    self.pagedata = self.response.read()
                    self.response.close()

                    if self.pagedata != None:
                        self.decompress()
                        feedback.debug("Succesfully downloaded %s" % self.url)
                        return
                    else:
                        raise ComicEngine.ComicError("No data obtained!")
                except ConnectionResetError as e:
                    if retries > 0:
                        print("Peer reset connection - retrying ...")
                        retries -= 1
                        time.sleep(2)
                        continue

                    raise DownloadError(
                        "Could not load %s\n%s" % (self.url, str(e)), self.url)

                except urllib.error.HTTPError as e:
                    if httpCodeClass(e.code) == 500 and retries > 0:
                        feedback.warn(
                            "# HTTP %i error - retrying %i times ..." %
                            (e.code, retries))
                        retries -= 1
                        time.sleep(2)
                        continue
                    if httpCodeClass(e.code) == 400:
                        raise DownloadError("Request error: %i" % e.code,
                                            self.url, e.code)

                    raise DownloadError(
                        "Could not load %s\n%s" % (self.url, str(e)), self.url,
                        e.code)
示例#3
0
def getParentDir(path):
    """ Get the parent directory of a path

    Returns an absolute path
    """
    abspath = os.path.abspath(path)
    i = abspath.rfind(os.path.sep)

    if i == 0:
        return "/"
    elif i < 0:
        raise ValueError("Nothing above root")

    feedback.debug("Containing dir: %s"%abspath[:i])
    return abspath[:i]
示例#4
0
def main():
    global step_delay
    global ch_start
    global ch_end
    global dlstate
    global cbzdl_version

    print("cbzdl v.%s" % cbzdl_version)

    args = parseArguments()
    feedback.debug_mode = args.verbose

    checkSpecialCases(args.url)

    dlstate = state.DownloaderState(args.url)
    checkState(args)

    ch_start = args.start
    ch_end   = args.end

    initializeState()

    try:
        cengine = dlstate.cengine
        comic_url = dlstate.get("url")

        if args.delay >= 0:
            step_delay = args.delay

        elif 'recommended_delay' in dir(cengine):
            step_delay = cengine.recommended_delay

        else:
            step_delay = 1

        feedback.debug("Delay chosen: %i" % step_delay)

        failed = downloadComic(cengine, comic_url, args)

    except ComicEngine.ComicError as e:
        feedback.fail(str(e) )

    if len(failed) > 0:
        feedback.error("Failed:")
        for chapter in failed:
            feedback.error("# %s"%chapter )

        dlstate.set("failed_chapters", failed)
示例#5
0
    def getPageUrls(self):

        base_chapter_url = util.regexGroup("https://readms.net(/r/.+)/[0-9.]+$", self.url)
        feedback.debug("Base URL : "+base_chapter_url)

        dom = self.getDomObject()

        pageurls = []

        links = dom.cssselect("ul.dropdown-menu li a")

        for elem_a in links:
            href = elem_a.attrib['href']
            if re.match(base_chapter_url, href):
                pageurls.append("https://readms.net" + href)

        return pageurls
示例#6
0
    def __init__(self, stated_source):
        state_file_name = "state.data"
        feedback.debug("Source: %s" % stated_source)

        self.__state_data = None

        if os.path.isdir(stated_source):
            self.__state_file = os.path.sep.join(
                [stated_source, state_file_name])
            self.load()

            if not self.has("url"):
                self.set("url", getOldSourceUrl(stated_source))

            self.cengine = ComicEngine.determineFrom(self.get("url"))

        else:
            self.cengine = ComicEngine.determineFrom(stated_source)
            feedback.debug("Comic engine: %s" % self.cengine.__name__)

            comic_dir = self.cengine.Comic(stated_source).getComicLowerName()
            feedback.debug('Comic dir: %s' % comic_dir)

            self.__state_file = os.path.sep.join([comic_dir, state_file_name])
            self.set("url", stated_source)
示例#7
0
    def getPageUrls(self):
        doc = self.getDomObject()

        image_nodes = doc.cssselect("img.fullsizable")

        page_urls = []
        # All pages are in one page - encode them and stuff them in a bogus query string
        i = 1
        for img in image_nodes:
            imgurl = img.attrib['src']
            feedback.debug(imgurl)
            pagenum = i
            i += 1

            if re.match(".+/nextchap.png", imgurl):
                continue

            page_urls.append(
                "%s?u=%s&n=%s" %
                (self.url, base64.urlsafe_b64encode(
                    imgurl.encode("utf-8")).decode("utf-8"), pagenum))

        return page_urls
示例#8
0
    def getPageUrls(self):
        document = self.getDomObject()
        child_nodes = document.get_element_by_id("vungdoc").getchildren()

        page_urls = []
        # All pages are in one page - encode them and stuff them in a bogus query string
        i = 1 # counter... hopefully pages always come in-order...!
        for node in child_nodes:
            if node.tag != 'img':
                continue
            elif not 'src' in node.attrib.keys():
                continue

            imgurl = node.attrib['src']
            feedback.debug(imgurl)
            pagenum = i #util.regexGroup(".+?([0-9]+)\\.[a-z]+$", imgurl)
            i += 1

            if re.match(".+/nextchap.png", imgurl):
                return None

            page_urls.append("%s?u=%s&n=%s"%(self.url , base64.urlsafe_b64encode(imgurl.encode("utf-8")).decode("utf-8"), pagenum) )

        return page_urls
示例#9
0
import filesys
import feedback

modules_dir = filesys.getParentDir(__file__)

feedback.debug("Modules from %s" % modules_dir)

module_files = filesys.listDir(modules_dir, "[a-zA-Z0-9]+.py$")

feedback.debug("Got files %s"%module_files)

engine_files = []
module_names = []

for i in range(len(module_files)):
    file_name = module_files[i]
    if file_name == "example_module.py" or file_name == "moduleslist.py":
        continue

    module_name = file_name[:-3]

    engine_files.append( "modules.%s" % module_name )
    module_names.append( module_name )

# To be done:
# https://www.manga.club
示例#10
0
def downloadChapter(cengine, chapter_url, comic_dir):
    """ Kicks off the page downloads for a chapter

    Checks whether chapter number is within specified bounds
    
    On completion, if there were no page download errors, attempts CBZ creation

    Returns number of errors encountered
    """
    feedback.debug("Start on %s ..."%chapter_url)

    global step_delay
    global ch_start
    global ch_end

    chapter     = cengine.Chapter(chapter_url)
    chapter_num = float(chapter.getChapterNumber() )
    
    if chapter_num < ch_start:
        return 0
    elif chapter_num > ch_end:
        return 'max'
    
    # IF no start was specified THEN use the last success as base
    if ch_start == -1 and chapter_num <= dlstate.get("last"):
        return 0

    feedback.info("  Get %s"%chapter_url)

    page_urls   = chapter.getPageUrls()
    if page_urls == None:
        return ['%s not a valid chapter'%chapter_num]

    chapter_dir = os.path.sep.join([comic_dir, chapter.getChapterLowerName()])

    feedback.info("    %i pages"%len(page_urls))

    failed_urls = []
    for url in page_urls:
        try:
            downloadPage(cengine, url, chapter_dir)
        except ComicEngine.ComicError as e:
            feedback.warn("Oops : %s"%str(e) )
            failed_urls.append(url)
        except urllib.error.URLError as e:
            feedback.warn("Could not download %s"%url)
            failed_urls.append(url)
        except web.DownloadError as e:
            feedback.warn("%i : %s"%(e.code,str(e)) )
            failed_urls.append(url)

        time.sleep(step_delay)

    if len(failed_urls) == 0:
        feedback.debug("  Compiling to CBZ ...")
        try:
            cbz.CBZArchive(chapter_dir).compile(remove_dir=True)
            dlstate.set("last", chapter_num) # Inequivocable success !
        except Exception as e:
            feedback.warn( str(e) )
            errors += 1

    return failed_urls
示例#11
0
 def getChapterNumber(self):
     cnum = util.regexGroup("https://readms.net/r/[^/]+/([0-9.]+)", self.url)
     feedback.debug("Return chapter number: %s" % cnum)
     return cnum
示例#12
0
 def getComicLowerName(self):
     res = util.regexGroup("https://readms.net/manga/([^/]+)", self.url)
     feedback.debug(": "+res)
     return res
示例#13
0
 def __init__(self, url):
     ComicSite.__init__(self, re.sub("/r/([^/]+).*", "/manga/\\1", url) )
     feedback.debug(self.url)
     self.name = self.getComicLowerName()