def _copy_chunks(self): init = fs_encode(self.info.get_chunk_name(0)) #: initial chunk name if self.info.get_count() > 1: with open(init, "rb+") as fo: #: first chunkfile for i in xrange(1, self.info.get_count()): # input file fo.seek( self.info.get_chunk_range(i - 1)[1] + 1) #: seek to beginning of chunk, to get rid of overlapping chunks fname = fs_encode("%s.chunk%d" % (self.filename, i)) with open(fname, "rb") as fi: buf = 32 * 1024 while True: #: copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) if fo.tell() < self.info.get_chunk_range(i)[1]: os.remove(init) self.info.remove() #: there are probably invalid chunks raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") os.remove(fname) #: remove chunk if self.nameDisposition and self.disposition: self.filename = fs_join(os.path.dirname(self.filename), self.nameDisposition) shutil.move(init, fs_encode(self.filename)) self.info.remove() #: remove info file
def get_download(path): path = urllib.unquote(decode(path)) #@TODO some files can not be downloaded root = API.get_config_value("general", "download_folder") path = path.replace("..", "") return bottle.static_file(fs_encode(path), fs_encode(root))
def load(name): fs_name = fs_encode("%s.chunks" % name) if not os.path.exists(fs_name): raise IOError() fh = codecs.open(fs_name, "r", "utf_8") name = fh.readline()[:-1] size = fh.readline()[:-1] if name.startswith("name:") and size.startswith("size:"): name = name[5:] size = size[5:] else: fh.close() raise WrongFormat() ci = ChunkInfo(name) ci.loaded = True ci.setSize(size) while True: if not fh.readline(): #: skip line break name = fh.readline()[1:-1] range = fh.readline()[1:-1] if name.startswith("name:") and range.startswith("range:"): name = name[5:] range = range[6:].split("-") else: raise WrongFormat() ci.addChunk(name, (long(range[0]), long(range[1]))) fh.close() return ci
def downloads(): root = API.get_config_value("general", "download_folder") if not os.path.isdir(root): return base([_('Download directory not found.')]) data = { 'folder': [], 'files': [] } items = os.listdir(fs_encode(root)) for item in sorted([fs_decode(x) for x in items]): if os.path.isdir(fs_join(root, item)): folder = { 'name': item, 'path': item, 'files': [] } files = os.listdir(fs_join(root, item)) for file in sorted([fs_decode(x) for x in files]): try: if os.path.isfile(fs_join(root, item, file)): folder['files'].append(file) except Exception: pass data['folder'].append(folder) elif os.path.isfile(os.path.join(root, item)): data['files'].append(item) return render_to_response('downloads.html', {'files': data}, [pre_processor])
def save(self): fs_name = fs_encode("%s.chunks" % self.name) fh = codecs.open(fs_name, "w", "utf_8") fh.write("name:%s\n" % self.name) fh.write("size:%s\n" % self.size) for i, c in enumerate(self.chunks): fh.write("#%d:\n" % i) fh.write("\tname:%s\n" % c[0]) fh.write("\trange:%i-%i\n" % c[1]) fh.close()
def get_handle(self): """Returns a Curl handle ready to use for perform/multiperform""" self.set_request_context(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj) self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) # request all bytes, since some servers in russia seems to have a defect arihmetic unit fs_name = fs_encode(self.p.info.get_chunk_name(self.id)) if self.resume: self.fp = open(fs_name, "ab") self.arrived = self.fp.tell() if not self.arrived: self.arrived = os.stat(fs_name).st_size if self.range: # do nothing if chunk already finished if self.arrived + self.range[0] >= self.range[1]: return None if self.id == len(self.p.info.chunks) - 1: #: as last chunk dont set end range, so we get everything range = "%i-" % (self.arrived + self.range[0]) else: range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked resume with range %s" % range) self.c.setopt(pycurl.RANGE, range) else: self.log.debug("Resume File from %i" % self.arrived) self.c.setopt(pycurl.RESUME_FROM, self.arrived) else: if self.range: if self.id == len(self.p.info.chunks) - 1: #: see above range = "%i-" % self.range[0] else: range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked with range %s" % range) self.c.setopt(pycurl.RANGE, range) self.fp = open(fs_name, "wb") return self.c
def check_download(self, rules, api_size=0, max_size=50000, delete=True, read_size=0): """ checks the content of the last downloaded file, re match is saved to `lastCheck` :param rules: dict with names and rules to match (compiled regexp or strings) :param api_size: expected file size :param max_size: if the file is larger then it wont be checked :param delete: delete if matched :param read_size: amount of bytes to read from files larger then max_size :return: dictionary key of the first rule that matched """ lastDownload = fs_encode(self.lastDownload) if not os.path.exists(lastDownload): return None size = os.stat(lastDownload) size = size.st_size if api_size and api_size <= size: return None elif size > max_size and not read_size: return None self.log_debug("Download Check triggered") with open(lastDownload, "rb") as f: content = f.read(read_size if read_size else -1) # produces encoding errors, better log to other file in the future? # self.log_debug("Content: %s" % content) for name, rule in rules.iteritems(): if isinstance(rule, basestring): if rule in content: if delete: os.remove(lastDownload) return name elif hasattr(rule, "search"): m = rule.search(content) if m: if delete: os.remove(lastDownload) self.lastCheck = m return name
def remove(self): fs_name = fs_encode("%s.chunks" % self.name) if os.path.exists(fs_name): os.remove(fs_name)
def _download(self, chunks, resume): if not resume: self.info.clear() self.info.add_chunk("%s.chunk0" % self.filename, (0, 0)) #: create an initial entry self.info.save() self.chunks = [] init = HTTPChunk(0, self, None, resume) #: initial chunk that will load complete file (if needed) self.chunks.append(init) self.manager.add_handle(init.get_handle()) lastFinishCheck = 0 lastTimeCheck = 0 chunksDone = set() #: list of curl handles that are finished chunksCreated = False done = False if self.info.get_count() is 0: #: This is a resume, if we were chunked originally assume still can self.chunkSupport = False while 1: # need to create chunks if not chunksCreated and self.chunkSupport and self.size: #: will be setted later by first chunk if not resume: self.info.set_size(self.size) self.info.create_chunks(chunks) self.info.save() chunks = self.info.get_count() init.setRange(self.info.get_chunk_range(0)) for i in xrange(1, chunks): c = HTTPChunk(i, self, self.info.get_chunk_range(i), resume) handle = c.getHandle() if handle: self.chunks.append(c) self.manager.add_handle(handle) else: # close immediatly self.log.debug("Invalid curl handle -> closed") c.close() chunksCreated = True while 1: ret, num_handles = self.manager.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break t = time.time() # reduce these calls while lastFinishCheck + 0.5 < t: # list of failed curl handles failed = [] ex = None #: save only last exception, we can only raise one anyway num_q, ok_list, err_list = self.manager.info_read() for c in ok_list: chunk = self.find_chunk(c) try: #: check if the header implies success, else add it to failed list chunk.verifyHeader() except BadHeader, e: self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunksDone.add(c) for c in err_list: curl, errno, msg = c chunk = self.find_chunk(curl) # test if chunk was finished if errno != 23 or "0 !=" not in msg: failed.append(chunk) ex = pycurl.error(errno, msg) self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) continue try: #: check if the header implies success, else add it to failed list chunk.verifyHeader() except BadHeader, e: self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunksDone.add(curl) if not num_q: #: no more infos to get # check if init is not finished so we reset download connections # note that other chunks are closed and downloaded with init too if failed and init not in failed and init.c not in chunksDone: self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) # list of chunks to clean and remove to_clean = filter(lambda x: x is not init, self.chunks) for chunk in to_clean: self.close_chunk(chunk) self.chunks.remove(chunk) os.remove(fs_encode(self.info.get_chunk_name(chunk.id))) # let first chunk load the rest and update the info file init.resetRange() self.info.clear() self.info.add_chunk("%s.chunk0" % self.filename, (0, self.size)) self.info.save() elif failed: raise ex lastFinishCheck = t if len(chunksDone) >= len(self.chunks): if len(chunksDone) > len(self.chunks): self.log.warning("Finished download chunks size incorrect, please report bug.") done = True #: all chunks loaded break
try: newname = self.req.http_download(url, filename, get=get, post=post, ref=ref, cookies=cookies, chunks=self.get_chunk_count(), resume=self.resumeDownload, progressNotify=self.pyfile.setProgress, disposition=disposition) finally: self.pyfile.size = self.req.size if newname: newname = urlparse.urlparse(newname).path.split('/')[-1] if disposition and newname != name: self.log_info(_("%(name)s saved as %(newname)s") % {"name": name, "newname": newname}) self.pyfile.name = newname filename = os.path.join(location, newname) fs_filename = fs_encode(filename) if self.core.config.get("permission", "change_file"): try: os.chmod(fs_filename, int(self.core.config.get("permission", "file"), 8)) except Exception, e: self.log_warning(_("Setting file mode failed"), e) if self.core.config.get("permission", "change_dl") and os.name != "nt": try: uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2] gid = grp.getgrnam(self.core.config.get("permission", "group"))[2] os.chown(fs_filename, uid, gid) except Exception, e: self.log_warning(_("Setting User and Group failed"), e)