Exemplo n.º 1
0
    def _copyChunks(self):
        init = fs_encode(self.info.getChunkName(0))  #initial chunk name

        if self.info.getCount() > 1:
            fo = open(init, "rb+")  #first chunkfile
            for i in range(1, self.info.getCount()):
                #input file
                fo.seek(
                    self.info.getChunkRange(i - 1)[1] + 1
                )  #seek to beginning of chunk, to get rid of overlapping chunks
                fname = fs_encode("%s.chunk%d" % (self.filename, i))
                fi = open(fname, "rb")
                buf = 32 * 1024
                while True:  #copy in chunks, consumes less memory
                    data = fi.read(buf)
                    if not data:
                        break
                    fo.write(data)
                fi.close()
                if fo.tell() < self.info.getChunkRange(i)[1]:
                    fo.close()
                    remove(init)
                    self.info.remove()  #there are probably invalid chunks
                    raise Exception(
                        "Downloaded content was smaller than expected. Try to reduce download connections."
                    )
                remove(fname)  #remove chunk
            fo.close()

        if self.name:
            self.filename = save_join(dirname(self.filename), self.name)

        move(init, fs_encode(self.filename))
        self.info.remove()  #remove info file
Exemplo n.º 2
0
    def _copyChunks(self):
        init = fs_encode(self.info.getChunkName(0)) #initial chunk name

        if self.info.getCount() > 1:
            fo = open(init, "rb+") #first chunkfile
            for i in range(1, self.info.getCount()):
                #input file
                fo.seek(
                    self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks
                fname = fs_encode("%s.chunk%d" % (self.filename, i))
                fi = open(fname, "rb")
                buf = 32 * 1024
                while True: #copy in chunks, consumes less memory
                    data = fi.read(buf)
                    if not data:
                        break
                    fo.write(data)
                fi.close()
                if fo.tell() < self.info.getChunkRange(i)[1]:
                    fo.close()
                    remove(init)
                    self.info.remove() #there are probably invalid chunks
                    raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.")
                remove(fname) #remove chunk
            fo.close()

        if self.name:
            self.filename = save_join(dirname(self.filename), self.name)

        move(init, fs_encode(self.filename))
        self.info.remove() #remove info file
Exemplo n.º 3
0
    def extract(self, progress, password=None):
        command = "x" if self.fullpath else "e"

        # popen thinks process is still alive (just like pexpect) - very strange behavior
        # so for now progress can not be determined correctly
        p = self.call_unrar(command,
                            fs_encode(self.file),
                            self.out,
                            password=password)
        renice(p.pid, self.renice)

        progress(0)
        out, err = p.communicate()  #wait for process
        progress(100)

        if "CRC failed" in err and not password and not self.passwordProtected:
            raise CRCError
        elif "CRC failed" in err:
            raise WrongPassword
        if err.strip():  #raise error if anything is on stderr
            raise ArchiveError(err.strip())
        if p.returncode:
            raise ArchiveError("Process terminated")

        if not self.files:
            self.password = password
            self.listContent()
Exemplo n.º 4
0
    def load(name):
        fs_name = fs_encode("%s.chunks" % name)
        if not exists(fs_name):
            raise IOError()
        fh = codecs.open(fs_name, "r", "utf_8")
        name = fh.readline()[:-1]
        size = fh.readline()[:-1]
        if name.startswith("name:") and size.startswith("size:"):
            name = name[5:]
            size = size[5:]
        else:
            fh.close()
            raise WrongFormat()
        ci = ChunkInfo(name)
        ci.loaded = True
        ci.setSize(size)
        while True:
            if not fh.readline(): #skip line
                break
            name = fh.readline()[1:-1]
            range = fh.readline()[1:-1]
            if name.startswith("name:") and range.startswith("range:"):
                name = name[5:]
                range = range[6:].split("-")
            else:
                raise WrongFormat()

            ci.addChunk(name, (long(range[0]), long(range[1])))
        fh.close()
        return ci
Exemplo n.º 5
0
    def checkArchive(self):
        p = self.call_unrar("l", "-v", fs_encode(self.file))
        out, err = p.communicate()
        if self.re_wrongpwd.search(err):
            self.passwordProtected = True
            self.headerProtected = True
            return True

        # output only used to check if passworded files are present
        if self.re_version.search(out):
            for attr, size, name in self.re_filelist5.findall(out):
                if attr.startswith("*"):
                    self.passwordProtected = True
                    return True
        else:
            for name, size, packed in self.re_filelist.findall(out):
                if name.startswith("*"):
                    self.passwordProtected = True
                    return True

        self.listContent()
        if not self.files:
            raise ArchiveError("Empty Archive")

        return False
Exemplo n.º 6
0
    def checkArchive(self):
        p = self.call_unrar("l", "-v", fs_encode(self.file))
        out, err = p.communicate()
        if self.re_wrongpwd.search(err):
            self.passwordProtected = True
            self.headerProtected = True
            return True

        # output only used to check if passworded files are present
        if self.re_version.search(out):
            for attr, size, name in self.re_filelist5.findall(out):
                if attr.startswith("*"):
                    self.passwordProtected = True
                    return True
        else:
            for name, size, packed in self.re_filelist.findall(out):
                if name.startswith("*"):
                    self.passwordProtected = True
                    return True

        self.listContent()
        if not self.files:
            raise ArchiveError("Empty Archive")

        return False
Exemplo n.º 7
0
    def load(name):
        fs_name = fs_encode("%s.chunks" % name)
        if not exists(fs_name):
            raise IOError()
        fh = codecs.open(fs_name, "r", "utf_8")
        name = fh.readline()[:-1]
        size = fh.readline()[:-1]
        if name.startswith("name:") and size.startswith("size:"):
            name = name[5:]
            size = size[5:]
        else:
            fh.close()
            raise WrongFormat()
        ci = ChunkInfo(name)
        ci.loaded = True
        ci.setSize(size)
        while True:
            if not fh.readline():  #skip line
                break
            name = fh.readline()[1:-1]
            range = fh.readline()[1:-1]
            if name.startswith("name:") and range.startswith("range:"):
                name = name[5:]
                range = range[6:].split("-")
            else:
                raise WrongFormat()

            ci.addChunk(name, (long(range[0]), long(range[1])))
        fh.close()
        return ci
Exemplo n.º 8
0
    def checkPassword(self, password):
        #at this point we can only verify header protected files
        if self.headerProtected:
            p = self.call_unrar("l", "-v", fs_encode(self.file), password=password)
            out, err = p.communicate()
            if self.re_wrongpwd.search(err):
                return False

        return True
Exemplo n.º 9
0
    def checkPassword(self, password):
        #at this point we can only verify header protected files
        if self.headerProtected:
            p = self.call_unrar("l", "-v", fs_encode(self.file), password=password)
            out, err = p.communicate()
            if self.re_wrongpwd.search(err):
                return False

        return True
Exemplo n.º 10
0
 def save(self):
     fs_name = fs_encode("%s.chunks" % self.name)
     fh = codecs.open(fs_name, "w", "utf_8")
     fh.write("name:%s\n" % self.name)
     fh.write("size:%s\n" % self.size)
     for i, c in enumerate(self.chunks):
         fh.write("#%d:\n" % i)
         fh.write("\tname:%s\n" % c[0])
         fh.write("\trange:%i-%i\n" % c[1])
     fh.close()
Exemplo n.º 11
0
 def save(self):
     fs_name = fs_encode("%s.chunks" % self.name)
     fh = codecs.open(fs_name, "w", "utf_8")
     fh.write("name:%s\n" % self.name)
     fh.write("size:%s\n" % self.size)
     for i, c in enumerate(self.chunks):
         fh.write("#%d:\n" % i)
         fh.write("\tname:%s\n" % c[0])
         fh.write("\trange:%i-%i\n" % c[1])
     fh.close()
Exemplo n.º 12
0
    def getHandle(self):
        """ returns a Curl handle ready to use for perform/multiperform """

        self.setRequestContext(self.p.url, self.p.get, self.p.post,
                               self.p.referer, self.p.cj)
        self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
        self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)

        # request all bytes, since some servers in russia seems to have a defect arihmetic unit

        fs_name = fs_encode(self.p.info.getChunkName(self.id))
        if self.resume:
            self.fp = open(fs_name, "ab")
            self.arrived = self.fp.tell()
            if not self.arrived:
                self.arrived = stat(fs_name).st_size

            if self.range:
                #do nothing if chunk already finished
                if self.arrived + self.range[0] >= self.range[1]: return None

                if self.id == len(
                        self.p.info.chunks
                ) - 1:  #as last chunk dont set end range, so we get everything
                    range = "%i-" % (self.arrived + self.range[0])
                else:
                    range = "%i-%i" % (self.arrived + self.range[0],
                                       min(self.range[1] + 1, self.p.size - 1))

                self.log.debug("Chunked resume with range %s" % range)
                self.c.setopt(pycurl.RANGE, range)
            else:
                self.log.debug("Resume File from %i" % self.arrived)
                self.c.setopt(pycurl.RESUME_FROM, self.arrived)

        else:
            if self.range:
                if self.id == len(self.p.info.chunks) - 1:  # see above
                    range = "%i-" % self.range[0]
                else:
                    range = "%i-%i" % (self.range[0],
                                       min(self.range[1] + 1, self.p.size - 1))

                self.log.debug("Chunked with range %s" % range)
                self.c.setopt(pycurl.RANGE, range)

            self.fp = open(fs_name, "wb")

        return self.c
Exemplo n.º 13
0
    def listContent(self):
        command = "vb" if self.fullpath else "lb"
        p = self.call_unrar(command, "-v", fs_encode(self.file), password=self.password)
        out, err = p.communicate()

        if "Cannot open" in err:
            raise ArchiveError("Cannot open file")

        if err.strip(): # only log error at this point
            self.m.logError(err.strip())

        result = set()

        for f in decode(out).splitlines():
            f = f.strip()
            result.add(save_join(self.out, f))

        self.files = result
Exemplo n.º 14
0
    def listContent(self):
        command = "vb" if self.fullpath else "lb"
        p = self.call_unrar(command, "-v", fs_encode(self.file), password=self.password)
        out, err = p.communicate()

        if "Cannot open" in err:
            raise ArchiveError("Cannot open file")

        if err.strip(): # only log error at this point
            self.m.logError(err.strip())

        result = set()

        for f in decode(out).splitlines():
            f = f.strip()
            result.add(save_join(self.out, f))

        self.files = result
Exemplo n.º 15
0
    def getHandle(self):
        """ returns a Curl handle ready to use for perform/multiperform """

        self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj)
        self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
        self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)

        # request all bytes, since some servers in russia seems to have a defect arihmetic unit

        fs_name = fs_encode(self.p.info.getChunkName(self.id))
        if self.resume:
            self.fp = open(fs_name, "ab")
            self.arrived = self.fp.tell()
            if not self.arrived:
                self.arrived = stat(fs_name).st_size

            if self.range:
                #do nothing if chunk already finished
                if self.arrived + self.range[0] >= self.range[1]: return None

                if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything
                    range = "%i-" % (self.arrived + self.range[0])
                else:
                    range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1))

                self.log.debug("Chunked resume with range %s" % range)
                self.c.setopt(pycurl.RANGE, range)
            else:
                self.log.debug("Resume File from %i" % self.arrived)
                self.c.setopt(pycurl.RESUME_FROM, self.arrived)

        else:
            if self.range:
                if self.id == len(self.p.info.chunks) - 1: # see above
                    range = "%i-" % self.range[0]
                else:
                    range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1))

                self.log.debug("Chunked with range %s" % range)
                self.c.setopt(pycurl.RANGE, range)

            self.fp = open(fs_name, "wb")

        return self.c
Exemplo n.º 16
0
    def extract(self, progress, password=None):
        command = "x" if self.fullpath else "e"

        p = self.call_unrar(command,
                            fs_encode(self.file),
                            self.out,
                            password=password)
        renice(p.pid, self.renice)

        progress(0)
        progressstring = ""
        while True:
            c = p.stdout.read(1)
            # quit loop on eof
            if not c:
                break
            # reading a percentage sign -> set progress and restart
            if c == '%':
                progress(int(progressstring))
                progressstring = ""
            # not reading a digit -> therefore restart
            elif c not in digits:
                progressstring = ""
            # add digit to progressstring
            else:
                progressstring = progressstring + c
        progress(100)

        # retrieve stderr
        err = p.stderr.read()

        if "CRC failed" in err and not password and not self.passwordProtected:
            raise CRCError
        elif "CRC failed" in err:
            raise WrongPassword
        if err.strip():  #: raise error if anything is on stderr
            raise ArchiveError(err.strip())
        if p.returncode:
            raise ArchiveError("Process terminated")

        if not self.files:
            self.password = password
            self.listContent()
Exemplo n.º 17
0
    def checkDownload(self,
                      rules,
                      api_size=0,
                      max_size=50000,
                      delete=True,
                      read_size=0):
        """ checks the content of the last downloaded file, re match is saved to `lastCheck`
        
        :param rules: dict with names and rules to match (compiled regexp or strings)
        :param api_size: expected file size
        :param max_size: if the file is larger then it wont be checked
        :param delete: delete if matched
        :param read_size: amount of bytes to read from files larger then max_size
        :return: dictionary key of the first rule that matched
        """
        lastDownload = fs_encode(self.lastDownload)
        if not exists(lastDownload): return None

        size = stat(lastDownload)
        size = size.st_size

        if api_size and api_size <= size: return None
        elif size > max_size and not read_size: return None
        self.log.debug("Download Check triggered")
        f = open(lastDownload, "rb")
        content = f.read(read_size if read_size else -1)
        f.close()
        #produces encoding errors, better log to other file in the future?
        #self.log.debug("Content: %s" % content)
        for name, rule in rules.iteritems():
            if type(rule) in (str, unicode):
                if rule in content:
                    if delete:
                        remove(lastDownload)
                    return name
            elif hasattr(rule, "search"):
                m = rule.search(content)
                if m:
                    if delete:
                        remove(lastDownload)
                    self.lastCheck = m
                    return name
Exemplo n.º 18
0
    def extract(self, progress, password=None):
        command = "x" if self.fullpath else "e"

        p = self.call_unrar(command, fs_encode(self.file), self.out, password=password)
        renice(p.pid, self.renice)

        progress(0)
        progressstring = ""
        while True:
            c = p.stdout.read(1)
            # quit loop on eof
            if not c:
                break
            # reading a percentage sign -> set progress and restart
            if c == '%':
                progress(int(progressstring))
                progressstring = ""
            # not reading a digit -> therefore restart
            elif c not in digits:
                progressstring = ""
            # add digit to progressstring
            else:
                progressstring = progressstring + c
        progress(100)

        # retrieve stderr
        err = p.stderr.read()

        if "CRC failed" in err and not password and not self.passwordProtected:
            raise CRCError
        elif "CRC failed" in err:
            raise WrongPassword
        if err.strip(): #raise error if anything is on stderr
            raise ArchiveError(err.strip())
        if p.returncode:
            raise ArchiveError("Process terminated")

        if not self.files:
            self.password = password
            self.listContent()
Exemplo n.º 19
0
    def checkDownload(self, rules, api_size=0, max_size=50000, delete=True, read_size=0):
        """ checks the content of the last downloaded file, re match is saved to `lastCheck`
        
        :param rules: dict with names and rules to match (compiled regexp or strings)
        :param api_size: expected file size
        :param max_size: if the file is larger then it wont be checked
        :param delete: delete if matched
        :param read_size: amount of bytes to read from files larger then max_size
        :return: dictionary key of the first rule that matched
        """
        lastDownload = fs_encode(self.lastDownload)
        if not exists(lastDownload): return None

        size = stat(lastDownload)
        size = size.st_size

        if api_size and api_size <= size: return None
        elif size > max_size and not read_size: return None
        self.log.debug("Download Check triggered")
        f = open(lastDownload, "rb")
        content = f.read(read_size if read_size else -1)
        f.close()
        #produces encoding errors, better log to other file in the future?
        #self.log.debug("Content: %s" % content)
        for name, rule in rules.iteritems():
            if type(rule) in (str, unicode):
                if rule in content:
                    if delete:
                        remove(lastDownload)
                    return name
            elif hasattr(rule, "search"):
                m = rule.search(content)
                if m:
                    if delete:
                        remove(lastDownload)
                    self.lastCheck = m
                    return name
Exemplo n.º 20
0
    def getLocalContent(self, urls):
        """Load files from disk and separate to file content and url list

        :param urls:
        :return: list of (filename, content), remote urls
        """
        content = []
        # do nothing if no decryptFile method
        if hasattr(self.__class__, "decryptFile"):
            remote = []
            for url in urls:
                path = None
                if url.startswith("http"):  # skip urls directly
                    pass
                elif url.startswith(self.CONTENT_PREFIX):
                    path = url
                elif exists(url):
                    path = url
                elif exists(self.core.path(url)):
                    path = self.core.path(url)

                if path:
                    try:
                        if path.startswith(self.CONTENT_PREFIX):
                            content.append(
                                ("", path[len(self.CONTENT_PREFIX)]))
                        else:
                            f = open(fs_encode(path), "rb")
                            content.append((f.name, f.read()))
                            f.close()
                    except IOError, e:
                        self.logError("IOError", e)
                else:
                    remote.append(url)

            #swap filtered url list
            urls = remote
Exemplo n.º 21
0
    def extract(self, progress, password=None):
        command = "x" if self.fullpath else "e"

        # popen thinks process is still alive (just like pexpect) - very strange behavior
        # so for now progress can not be determined correctly
        p = self.call_unrar(command, fs_encode(self.file), self.out, password=password)
        renice(p.pid, self.renice)

        progress(0)
        out, err = p.communicate() #wait for process
        progress(100)

        if "CRC failed" in err and not password and not self.passwordProtected:
            raise CRCError
        elif "CRC failed" in err:
            raise WrongPassword
        if err.strip(): #raise error if anything is on stderr
            raise ArchiveError(err.strip())
        if p.returncode:
            raise ArchiveError("Process terminated")

        if not self.files:
            self.password = password
            self.listContent()
Exemplo n.º 22
0
    def getLocalContent(self, urls):
        """Load files from disk and separate to file content and url list

        :param urls:
        :return: list of (filename, content), remote urls
        """
        content = []
        # do nothing if no decryptFile method
        if hasattr(self.__class__, "decryptFile"):
            remote = []
            for url in urls:
                path = None
                if url.startswith("http"): # skip urls directly
                    pass
                elif url.startswith(self.CONTENT_PREFIX):
                    path = url
                elif exists(url):
                    path = url
                elif exists(self.core.path(url)):
                    path = self.core.path(url)

                if path:
                    try:
                        if path.startswith(self.CONTENT_PREFIX):
                            content.append(("", path[len(self.CONTENT_PREFIX)]))
                        else:
                            f = open(fs_encode(path), "rb")
                            content.append((f.name, f.read()))
                            f.close()
                    except IOError, e:
                        self.logError("IOError", e)
                else:
                    remote.append(url)

            #swap filtered url list
            urls = remote
Exemplo n.º 23
0
    def _download(self, chunks, resume):
        if not resume:
            self.info.clear()
            self.info.addChunk("%s.chunk0" % self.filename,
                               (0, 0))  #create an initial entry

        self.chunks = []

        init = HTTPChunk(
            0, self, None,
            resume)  #initial chunk that will load complete file (if needed)

        self.chunks.append(init)
        self.m.add_handle(init.getHandle())

        lastFinishCheck = 0
        lastTimeCheck = 0
        chunksDone = set()  # list of curl handles that are finished
        chunksCreated = False
        done = False
        if self.info.getCount(
        ) > 1:  # This is a resume, if we were chunked originally assume still can
            self.chunkSupport = True

        while 1:
            #need to create chunks
            if not chunksCreated and self.chunkSupport and self.size:  #will be set later by first chunk

                if not resume:
                    self.info.setSize(self.size)
                    self.info.createChunks(chunks)
                    self.info.save()

                chunks = self.info.getCount()

                init.setRange(self.info.getChunkRange(0))

                for i in range(1, chunks):
                    c = HTTPChunk(i, self, self.info.getChunkRange(i), resume)

                    handle = c.getHandle()
                    if handle:
                        self.chunks.append(c)
                        self.m.add_handle(handle)
                    else:
                        #close immediately
                        self.log.debug("Invalid curl handle -> closed")
                        c.close()

                chunksCreated = True

            while 1:
                ret, num_handles = self.m.perform()
                if ret != pycurl.E_CALL_MULTI_PERFORM:
                    break

            t = time()

            # reduce these calls
            # when num_q is 0, the loop is exited
            while lastFinishCheck + 0.5 < t:
                # list of failed curl handles
                failed = []
                ex = None  # save only last exception, we can only raise one anyway

                num_q, ok_list, err_list = self.m.info_read()
                for c in ok_list:
                    chunk = self.findChunk(c)
                    try:  # check if the header implies success, else add it to failed list
                        chunk.verifyHeader()
                    except BadHeader, e:
                        self.log.debug("Chunk %d failed: %s" %
                                       (chunk.id + 1, str(e)))
                        failed.append(chunk)
                        ex = e
                    else:
                        chunksDone.add(c)

                for c in err_list:
                    curl, errno, msg = c
                    chunk = self.findChunk(curl)
                    #test if chunk was finished
                    if errno != 23 or "0 !=" not in msg:
                        failed.append(chunk)
                        ex = pycurl.error(errno, msg)
                        self.log.debug("Chunk %d failed: %s" %
                                       (chunk.id + 1, str(ex)))
                        continue

                    try:  # check if the header implies success, else add it to failed list
                        chunk.verifyHeader()
                    except BadHeader, e:
                        self.log.debug("Chunk %d failed: %s" %
                                       (chunk.id + 1, str(e)))
                        failed.append(chunk)
                        ex = e
                    else:
                        chunksDone.add(curl)
                if not num_q:  # no more info to get

                    # check if init is not finished so we reset download connections
                    # note that other chunks are closed and everything downloaded with initial connection
                    if failed and init not in failed and init.c not in chunksDone:
                        self.log.error(
                            _("Download chunks failed, fallback to single connection | %s"
                              % (str(ex))))

                        #list of chunks to clean and remove
                        to_clean = filter(lambda x: x is not init, self.chunks)
                        for chunk in to_clean:
                            self.closeChunk(chunk)
                            self.chunks.remove(chunk)
                            remove(fs_encode(self.info.getChunkName(chunk.id)))

                        #let first chunk load the rest and update the info file
                        init.resetRange()
                        self.info.clear()
                        self.info.addChunk("%s.chunk0" % self.filename,
                                           (0, self.size))
                        self.info.save()
                    elif failed:
                        raise ex

                    lastFinishCheck = t

                    if len(chunksDone) >= len(self.chunks):
                        if len(chunksDone) > len(self.chunks):
                            self.log.warning(
                                "Finished download chunks size incorrect, please report bug."
                            )
                        done = True  #all chunks loaded

                    break
Exemplo n.º 24
0
 def remove(self):
     fs_name = fs_encode("%s.chunks" % self.name)
     if exists(fs_name): remove(fs_name)
Exemplo n.º 25
0
    def extract(self, ids, thread=None):
        # reload from txt file
        self.reloadPasswords()

        # dl folder
        dl = self.config['general']['download_folder']

        extracted = []

        #iterate packages -> plugins -> targets
        for pid in ids:
            p = self.core.files.getPackage(pid)
            self.logInfo(_("Check package %s") % p.name)
            if not p:
                continue

            # determine output folder
            out = save_join(dl, p.folder, "")
            # force trailing slash

            if self.getConfig("destination") and self.getConfig("destination").lower() != "none":

                out = save_join(dl, p.folder, self.getConfig("destination"), "")
                #relative to package folder if destination is relative, otherwise absolute path overwrites them

                if self.getConfig("subfolder"):
                    out = join(out, fs_encode(p.folder))

                if not exists(out):
                    makedirs(out)

            files_ids = [(save_join(dl, p.folder, x["name"]), x["id"]) for x in p.getChildren().itervalues()]
            matched = False

            # check as long there are unseen files
            while files_ids:
                new_files_ids = []

                for plugin in self.plugins:
                    targets = plugin.getTargets(files_ids)
                    if targets:
                        self.logDebug("Targets for %s: %s" % (plugin.__name__, targets))
                        matched = True
                    for target, fid in targets:
                        if target in extracted:
                            self.logDebug(basename(target), "skipped")
                            continue
                        extracted.append(target)  # prevent extracting same file twice

                        klass = plugin(self, target, out, self.getConfig("fullpath"), self.getConfig("overwrite"),
                                       self.getConfig("renice"))
                        klass.init()

                        self.logInfo(basename(target), _("Extract to %s") % out)
                        new_files = self.startExtracting(klass, fid, p.password.strip().splitlines(), thread)
                        self.logDebug("Extracted: %s" % new_files)
                        self.setPermissions(new_files)

                        for file in new_files:
                            if not exists(file):
                                self.logDebug("new file %s does not exists" % file)
                                continue
                            if self.getConfig("recursive") and isfile(file):
                                new_files_ids.append((file, fid))  # append as new target

                files_ids = new_files_ids  # also check extracted files

            if not matched:
                self.logInfo(_("No files found to extract"))
Exemplo n.º 26
0
class Hoster(Base):
    """
    Base plugin for hoster plugin. Overwrite getInfo for online status retrieval, process for downloading.
    """
    @staticmethod
    def getInfo(urls):
        """This method is used to retrieve the online status of files for hoster plugins.
        It has to *yield* list of tuples with the result in this format (name, size, status, url),
        where status is one of API pyfile statuses.

        :param urls: List of urls
        :return: yield list of tuple with results (name, size, status, url)
        """
        pass

    def __init__(self, pyfile):
        Base.__init__(self, pyfile.m.core)

        self.wantReconnect = False
        #: enables simultaneous processing of multiple downloads
        self.limitDL = 0
        #: chunk limit
        self.chunkLimit = 1
        #: enables resume (will be ignored if server dont accept chunks)
        self.resumeDownload = False

        #: plugin is waiting
        self.waiting = False

        self.ocr = None  #captcha reader instance
        #: account handler instance, see :py:class:`Account`
        self.account = self.core.accountManager.getAccountForPlugin(
            self.__name__)

        #: premium status
        self.premium = False
        #: username/login
        self.user = None

        if self.account and not self.account.isUsable(): self.account = None
        if self.account:
            self.user = self.account.loginname
            #: Browser instance, see `network.Browser`
            self.req = self.account.getAccountRequest()
            # Default:  -1, True, True
            self.chunkLimit, self.limitDL, self.resumeDownload = self.account.getDownloadSettings(
            )
            self.premium = self.account.isPremium()
        else:
            self.req = self.core.requestFactory.getRequest(self.__name__)

        #: associated pyfile instance, see `PyFile`
        self.pyfile = pyfile
        self.thread = None  # holds thread in future

        #: location where the last call to download was saved
        self.lastDownload = ""
        #: re match of the last call to `checkDownload`
        self.lastCheck = None
        #: js engine, see `JsEngine`
        self.js = self.core.js

        self.retries = 0  # amount of retries already made
        self.html = None  # some plugins store html code here

        self.init()

    def getMultiDL(self):
        return self.limitDL <= 0

    def setMultiDL(self, val):
        self.limitDL = 0 if val else 1

    #: virtual attribute using self.limitDL on behind
    multiDL = property(getMultiDL, setMultiDL)

    def getChunkCount(self):
        if self.chunkLimit <= 0:
            return self.config["download"]["chunks"]
        return min(self.config["download"]["chunks"], self.chunkLimit)

    def getDownloadLimit(self):
        if self.account:
            limit = self.account.options.get("limitDL", 0)
            if limit == "": limit = 0
            if self.limitDL > 0:  # a limit is already set, we use the minimum
                return min(int(limit), self.limitDL)
            else:
                return int(limit)
        else:
            return self.limitDL

    def __call__(self):
        return self.__name__

    def init(self):
        """initialize the plugin (in addition to `__init__`)"""
        pass

    def setup(self):
        """ setup for environment and other things, called before downloading (possibly more than one time)"""
        pass

    def preprocessing(self, thread):
        """ handles important things to do before starting """
        self.thread = thread

        if self.account:
            # will force a re-login or reload of account info if necessary
            self.account.getAccountInfo()
        else:
            self.req.clearCookies()

        self.setup()

        self.pyfile.setStatus("starting")

        return self.process(self.pyfile)

    def process(self, pyfile):
        """the 'main' method of every plugin, you **have to** overwrite it"""
        raise NotImplementedError

    def abort(self):
        return self.pyfile.abort

    def resetAccount(self):
        """ don't use account and retry download """
        self.account = None
        self.req = self.core.requestFactory.getRequest(self.__name__)
        self.retry()

    def checksum(self, local_file=None):
        """
        return codes:
        0  - checksum ok
        1  - checksum wrong
        5  - can't get checksum
        10 - not implemented
        20 - unknown error
        """
        #@TODO checksum check addon

        return True, 10

    def setWait(self, seconds, reconnect=False):
        """Set a specific wait time later used with `wait`
        
        :param seconds: wait time in seconds
        :param reconnect: True if a reconnect would avoid wait time
        """
        if reconnect:
            self.wantReconnect = True
        self.pyfile.waitUntil = time() + int(seconds)

    def wait(self):
        """ waits the time previously set """
        self.waiting = True
        self.pyfile.setStatus("waiting")

        while self.pyfile.waitUntil > time():
            self.thread.m.reconnecting.wait(2)

            self.checkAbort()
            if self.thread.m.reconnecting.isSet():
                self.waiting = False
                self.wantReconnect = False
                raise Reconnect

        self.waiting = False
        self.pyfile.setStatus("starting")

    def offline(self):
        """ fail and indicate file is offline """
        raise Fail("offline")

    def tempOffline(self):
        """ fail and indicates file ist temporary offline, the core may take consequences """
        raise Fail("temp. offline")

    def retry(self, max_tries=3, wait_time=1, reason=""):
        """Retries and begin again from the beginning

        :param max_tries: number of maximum retries
        :param wait_time: time to wait in seconds
        :param reason: reason for retrying, will be passed to fail if max_tries reached
        """
        if 0 < max_tries <= self.retries:
            if not reason: reason = "Max retries reached"
            raise Fail(reason)

        self.wantReconnect = False
        self.setWait(wait_time)
        self.wait()

        self.retries += 1
        raise Retry(reason)

    def download(self,
                 url,
                 get={},
                 post={},
                 ref=True,
                 cookies=True,
                 disposition=False):
        """Downloads the content at url to download folder

        :param url:
        :param get:
        :param post:
        :param ref:
        :param cookies:
        :param disposition: if True and server provides content-disposition header\
        the filename will be changed if needed
        :return: The location where the file was saved
        """
        self.checkForSameFiles()
        self.checkAbort()

        self.pyfile.setStatus("downloading")

        download_folder = self.config['general']['download_folder']

        location = save_join(download_folder, self.pyfile.package().folder)

        if not exists(location):
            makedirs(location, int(self.core.config["permission"]["folder"],
                                   8))

            if self.core.config["permission"]["change_dl"] and os.name != "nt":
                try:
                    uid = getpwnam(self.config["permission"]["user"])[2]
                    gid = getgrnam(self.config["permission"]["group"])[2]

                    chown(location, uid, gid)
                except Exception, e:
                    self.log.warning(
                        _("Setting User and Group failed: %s") % str(e))

        # convert back to unicode
        location = fs_decode(location)
        name = save_filename(self.pyfile.name)

        filename = join(location, name)

        self.core.addonManager.dispatchEvent("download:start", self.pyfile,
                                             url, filename)

        try:
            newname = self.req.httpDownload(url,
                                            filename,
                                            get=get,
                                            post=post,
                                            ref=ref,
                                            cookies=cookies,
                                            chunks=self.getChunkCount(),
                                            resume=self.resumeDownload,
                                            disposition=disposition)
        finally:
            self.pyfile.size = self.req.size

        if disposition and newname and newname != name:  #triple check, just to be sure
            self.log.info("%(name)s saved as %(newname)s" % {
                "name": name,
                "newname": newname
            })
            self.pyfile.name = newname
            filename = join(location, newname)

        fs_filename = fs_encode(filename)

        if self.core.config["permission"]["change_file"]:
            chmod(fs_filename, int(self.core.config["permission"]["file"], 8))

        if self.core.config["permission"]["change_dl"] and os.name != "nt":
            try:
                uid = getpwnam(self.config["permission"]["user"])[2]
                gid = getgrnam(self.config["permission"]["group"])[2]

                chown(fs_filename, uid, gid)
            except Exception, e:
                self.log.warning(
                    _("Setting User and Group failed: %s") % str(e))
Exemplo n.º 27
0
 def remove(self):
     fs_name = fs_encode("%s.chunks" % self.name)
     if exists(fs_name): remove(fs_name)
Exemplo n.º 28
0
    def _download(self, chunks, resume):
        if not resume:
            self.info.clear()
            self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #create an initial entry

        self.chunks = []

        init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed)

        self.chunks.append(init)
        self.m.add_handle(init.getHandle())

        lastFinishCheck = 0
        lastTimeCheck = 0
        chunksDone = set()  # list of curl handles that are finished
        chunksCreated = False
        done = False
        if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can
            self.chunkSupport = True

        while 1:
            #need to create chunks
            if not chunksCreated and self.chunkSupport and self.size: #will be set later by first chunk

                if not resume:
                    self.info.setSize(self.size)
                    self.info.createChunks(chunks)
                    self.info.save()

                chunks = self.info.getCount()

                init.setRange(self.info.getChunkRange(0))

                for i in range(1, chunks):
                    c = HTTPChunk(i, self, self.info.getChunkRange(i), resume)

                    handle = c.getHandle()
                    if handle:
                        self.chunks.append(c)
                        self.m.add_handle(handle)
                    else:
                        #close immediately
                        self.log.debug("Invalid curl handle -> closed")
                        c.close()

                chunksCreated = True

            while 1:
                ret, num_handles = self.m.perform()
                if ret != pycurl.E_CALL_MULTI_PERFORM:
                    break

            t = time()

            # reduce these calls
            # when num_q is 0, the loop is exited
            while lastFinishCheck + 0.5 < t:
                # list of failed curl handles
                failed = []
                ex = None # save only last exception, we can only raise one anyway

                num_q, ok_list, err_list = self.m.info_read()
                for c in ok_list:
                    chunk = self.findChunk(c)
                    try: # check if the header implies success, else add it to failed list
                        chunk.verifyHeader()
                    except BadHeader, e:
                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
                        failed.append(chunk)
                        ex = e
                    else:
                        chunksDone.add(c)

                for c in err_list:
                    curl, errno, msg = c
                    chunk = self.findChunk(curl)
                    #test if chunk was finished
                    if errno != 23 or "0 !=" not in msg:
                        failed.append(chunk)
                        ex = pycurl.error(errno, msg)
                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex)))
                        continue

                    try: # check if the header implies success, else add it to failed list
                        chunk.verifyHeader()
                    except BadHeader, e:
                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
                        failed.append(chunk)
                        ex = e
                    else:
                        chunksDone.add(curl)
                if not num_q: # no more info to get

                    # check if init is not finished so we reset download connections
                    # note that other chunks are closed and everything downloaded with initial connection
                    if failed and init not in failed and init.c not in chunksDone:
                        self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex))))

                        #list of chunks to clean and remove
                        to_clean = filter(lambda x: x is not init, self.chunks)
                        for chunk in to_clean:
                            self.closeChunk(chunk)
                            self.chunks.remove(chunk)
                            remove(fs_encode(self.info.getChunkName(chunk.id)))

                        #let first chunk load the rest and update the info file
                        init.resetRange()
                        self.info.clear()
                        self.info.addChunk("%s.chunk0" % self.filename, (0, self.size))
                        self.info.save()
                    elif failed:
                        raise ex

                    lastFinishCheck = t

                    if len(chunksDone) >= len(self.chunks):
                        if len(chunksDone) > len(self.chunks):
                            self.log.warning("Finished download chunks size incorrect, please report bug.")
                        done = True  #all chunks loaded

                    break