def writeDebugReport(self, pyfile): """ writes a :return: """ dump_name = "debug_%s_%s.zip" % (pyfile.pluginname, time.strftime("%d-%m-%Y_%H-%M-%S")) dump = self.getDebugDump(pyfile) try: import zipfile zip = zipfile.ZipFile(dump_name, "w") for f in os.listdir(os.path.join("tmp", pyfile.pluginname)): try: # avoid encoding errors zip.write(os.path.join("tmp", pyfile.pluginname, f), fs_join(pyfile.pluginname, f)) except Exception: pass info = zipfile.ZipInfo(fs_join(pyfile.pluginname, "debug_Report.txt"), time.gmtime()) info.external_attr = 0644 << 16L #: change permissions zip.writestr(info, dump) zip.close() if not os.stat(dump_name).st_size: raise Exception("Empty Zipfile") except Exception, e: self.m.log.debug("Error creating zip file: %s" % e) dump_name = dump_name.replace(".zip", ".txt") with open(dump_name, "wb") as f: f.write(dump)
def downloads(): root = PYLOAD.getConfigValue("general", "download_folder") if not os.path.isdir(root): return base([_('Download directory not found.')]) data = {'folder': [], 'files': []} items = os.listdir(fs_encode(root)) for item in sorted([fs_decode(x) for x in items]): if os.path.isdir(fs_join(root, item)): folder = {'name': item, 'path': item, 'files': []} files = os.listdir(fs_join(root, item)) for file in sorted([fs_decode(x) for x in files]): try: if os.path.isfile(fs_join(root, item, file)): folder['files'].append(file) except Exception: pass data['folder'].append(folder) elif os.path.isfile(os.path.join(root, item)): data['files'].append(item) return render_to_response('downloads.html', {'files': data}, [pre_processor])
def downloads(): root = PYLOAD.getConfigValue("general", "download_folder") if not isdir(root): return base([_('Download directory not found.')]) data = { 'folder': [], 'files': [] } items = listdir(fs_encode(root)) for item in sorted([fs_decode(x) for x in items]): if isdir(fs_join(root, item)): folder = { 'name': item, 'path': item, 'files': [] } files = listdir(fs_join(root, item)) for file in sorted([fs_decode(x) for x in files]): try: if isfile(fs_join(root, item, file)): folder['files'].append(file) except Exception: pass data['folder'].append(folder) elif isfile(join(root, item)): data['files'].append(item) return render_to_response('downloads.html', {'files': data}, [pre_processor])
def list(self, password=None): command = "vb" if self.fullpath else "lb" p = self.call_cmd(command, "-v", fs_encode(self.filename), password=password) out, err = p.communicate() if "Cannot open" in err: raise ArchiveError(_("Cannot open file")) if err.strip(): #: only log error at this point self.manager.logError(err.strip()) result = set() if not self.fullpath and self.VERSION.startswith('5'): # NOTE: Unrar 5 always list full path for f in fs_decode(out).splitlines(): f = fs_join(self.out, os.path.basename(f.strip())) if os.path.isfile(f): result.add(fs_join(self.out, os.path.basename(f))) else: for f in fs_decode(out).splitlines(): f = f.strip() result.add(fs_join(self.out, f)) return list(result)
def packageFinished(self, pypack): download_folder = fs_join( self.config.get("general", "download_folder"), pypack.folder, "") for link in pypack.getChildren().itervalues(): file_type = splitext(link['name'])[1][1:].lower() if file_type not in self.formats: continue hash_file = fs_encode(fs_join(download_folder, link['name'])) if not isfile(hash_file): self.logWarning(_("File not found"), link['name']) continue with open(hash_file) as f: text = f.read() for m in re.finditer( self.regexps.get(file_type, self.regexps['default']), text): data = m.groupdict() self.logDebug(link['name'], data) local_file = fs_encode(fs_join(download_folder, data['NAME'])) algorithm = self.methods.get(file_type, file_type) checksum = computeChecksum(local_file, algorithm) if checksum == data['HASH']: self.logInfo( _('File integrity of "%s" verified by %s checksum (%s)' ) % (data['NAME'], algorithm, checksum)) else: self.logWarning( _("%s checksum for file %s does not match (%s != %s)") % (algorithm, data['NAME'], checksum, data['HASH']))
def packageFinished(self, pypack): download_folder = fs_join(self.config.get("general", "download_folder"), pypack.folder, "") for link in pypack.getChildren().itervalues(): file_type = splitext(link['name'])[1][1:].lower() if file_type not in self.formats: continue hash_file = fs_encode(fs_join(download_folder, link['name'])) if not isfile(hash_file): self.logWarning(_("File not found"), link['name']) continue with open(hash_file) as f: text = f.read() for m in re.finditer(self.regexps.get(file_type, self.regexps['default']), text): data = m.groupdict() self.logDebug(link['name'], data) local_file = fs_encode(fs_join(download_folder, data['NAME'])) algorithm = self.methods.get(file_type, file_type) checksum = computeChecksum(local_file, algorithm) if checksum == data['HASH']: self.logInfo(_('File integrity of "%s" verified by %s checksum (%s)') % (data['NAME'], algorithm, checksum)) else: self.logWarning(_("%s checksum for file %s does not match (%s != %s)") % (algorithm, data['NAME'], checksum, data['HASH']))
def downloadFinished(self, pyfile): if self.core.config.get("general", "folder_per_package"): download_folder = fs_join(self.core.config.get("general", "download_folder"), pyfile.package().folder) else: download_folder = self.core.config.get("general", "download_folder") for script in self.scripts['download_finished']: file = fs_join(download_folder, pyfile.name) self.callScript(script, pyfile.id, pyfile.name, file, pyfile.pluginname, pyfile.url)
def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None): # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") try: tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.__class__.__name__), "wb") tmpTif.close() # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.__class__.__name__), "wb") tmpTxt.close() except IOError, e: self.logError(e) return
def packageFinished(self, pack): files = {} fid_dict = {} for fid, data in pack.getChildren().iteritems(): if re.search("\.\d{3}$", data['name']): if data['name'][:-4] not in files: files[data['name'][:-4]] = [] files[data['name'][:-4]].append(data['name']) files[data['name'][:-4]].sort() fid_dict[data['name']] = fid download_folder = self.config.get("general", "download_folder") if self.config.get("general", "folder_per_package"): download_folder = fs_join(download_folder, pack.folder) for name, file_list in files.iteritems(): self.logInfo(_("Starting merging of"), name) with open(fs_join(download_folder, name), "wb") as final_file: for splitted_file in file_list: self.logDebug("Merging part", splitted_file) pyfile = self.core.files.getFile(fid_dict[splitted_file]) pyfile.setStatus("processing") try: with open(fs_join(download_folder, splitted_file), "rb") as s_file: size_written = 0 s_file_size = int(os.path.getsize(os.path.join(download_folder, splitted_file))) while True: f_buffer = s_file.read(self.BUFFER_SIZE) if f_buffer: final_file.write(f_buffer) size_written += self.BUFFER_SIZE pyfile.setProgress((size_written * 100) / s_file_size) else: break self.logDebug("Finished merging part", splitted_file) except Exception, e: traceback.print_exc() finally: pyfile.setProgress(100) pyfile.setStatus("finished") pyfile.release()
def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): """Downloads the content at url to download folder :param url: :param get: :param post: :param ref: :param cookies: :param disposition: if True and server provides content-disposition header\ the filename will be changed if needed :return: The location where the file was saved """ if self.pyfile.abort: self.abort() if not url: self.fail(_("No url given")) url = urllib.unquote(encode(url).strip()) if self.core.debug: self.logDebug( "Download url: " + url, *[ "%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url") ]) self.checkForSameFiles() self.pyfile.setStatus("downloading") if disposition: self.pyfile.name = urlparse.urlparse(url).path.split( '/')[-1] or self.pyfile.name download_folder = self.core.config.get("general", "download_folder") location = fs_join(download_folder, self.pyfile.package().folder) if not exists(location): try: makedirs(location, int(self.core.config.get("permission", "folder"), 8)) if self.core.config.get("permission", "change_dl") and os.name != "nt": uid = getpwnam(self.core.config.get("permission", "user"))[2] gid = getgrnam(self.core.config.get("permission", "group"))[2] chown(location, uid, gid) except Exception, e: self.fail(e)
def _copyChunks(self): init = fs_encode(self.info.getChunkName(0)) #: initial chunk name if self.info.getCount() > 1: with open(init, "rb+") as fo: #: first chunkfile for i in xrange(1, self.info.getCount()): # input file fo.seek( self.info.getChunkRange(i - 1)[1] + 1 ) #: seek to beginning of chunk, to get rid of overlapping chunks fname = fs_encode("%s.chunk%d" % (self.filename, i)) with open(fname, "rb") as fi: buf = 32 * 1024 while True: #: copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) if fo.tell() < self.info.getChunkRange(i)[1]: reshutil.move(init) self.info.remove( ) #: there are probably invalid chunks raise Exception( "Downloaded content was smaller than expected. Try to reduce download connections." ) reshutil.move(fname) #: remove chunk if self.nameDisposition and self.disposition: self.filename = fs_join(os.path.dirname(self.filename), self.nameDisposition) shutil.move(init, fs_encode(self.filename)) self.info.remove() #: remove info file
def decrypt(self, pyfile): fs_filename = fs_encode(pyfile.url.strip()) opener = urllib2.build_opener( MultipartPostHandler.MultipartPostHandler) dlc_content = opener.open( 'http://service.jdownloader.net/dlcrypt/getDLC.php', { 'src': "ccf", 'filename': "test.ccf", 'upload': open(fs_filename, "rb") }).read() download_folder = self.config.get("general", "download_folder") dlc_file = fs_join(download_folder, "tmp_%s.dlc" % pyfile.name) try: dlc = re.search(r'<dlc>(.+)</dlc>', dlc_content, re.S).group(1).decode('base64') except AttributeError: self.fail(_("Container is corrupted")) with open(dlc_file, "w") as tempdlc: tempdlc.write(dlc) self.urls = [dlc_file]
def decrypt(self, pyfile): m = re.match(self.__pattern, pyfile.url) m_id = m.group('ID') m_type = m.group('TYPE') if m_type == "playlist": self.logDebug("Url recognized as Playlist") p_info = self.getPlaylistInfo(m_id) playlists = [(m_id, ) + p_info] if p_info else None else: self.logDebug("Url recognized as Channel") playlists = self.getPlaylists(m_id) self.logDebug("%s playlist\s found on channel \"%s\"" % (len(playlists), m_id)) if not playlists: self.fail(_("No playlist available")) for p_id, p_name, p_owner in playlists: p_videos = self.getVideos(p_id) p_folder = fs_join(self.config.get("general", "download_folder"), p_owner, p_name) self.logDebug("%s video\s found on playlist \"%s\"" % (len(p_videos), p_name)) self.packages.append( (p_name, p_videos, p_folder)) #: folder is NOT recognized by pyload 0.4.9!
def _copyChunks(self): init = fs_encode(self.info.getChunkName(0)) #: initial chunk name if self.info.getCount() > 1: with open(init, "rb+") as fo: #: first chunkfile for i in xrange(1, self.info.getCount()): # input file fo.seek( self.info.getChunkRange(i - 1)[1] + 1) #: seek to beginning of chunk, to get rid of overlapping chunks fname = fs_encode("%s.chunk%d" % (self.filename, i)) with open(fname, "rb") as fi: buf = 32 * 1024 while True: #: copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) if fo.tell() < self.info.getChunkRange(i)[1]: reshutil.move(init) self.info.remove() #: there are probably invalid chunks raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") reshutil.move(fname) #: remove chunk if self.nameDisposition and self.disposition: self.filename = fs_join(os.path.dirname(self.filename), self.nameDisposition) shutil.move(init, fs_encode(self.filename)) self.info.remove() #: remove info file
def checkForSameFiles(self, starting=False): """ checks if same file was/is downloaded within same package :param starting: indicates that the current download is going to start :raises SkipDownload: """ pack = self.pyfile.package() for pyfile in self.core.files.cache.values(): if pyfile != self.pyfile and pyfile.name == self.pyfile.name and pyfile.package().folder == pack.folder: if pyfile.status in (0, 12): #: finished or downloading raise SkipDownload(pyfile.pluginname) elif pyfile.status in (5, 7) and starting: #: a download is waiting/starting and was appenrently started before raise SkipDownload(pyfile.pluginname) download_folder = self.core.config.get("general", "download_folder") location = fs_join(download_folder, pack.folder, self.pyfile.name) if starting and self.core.config.get("download", "skip_existing") and os.path.exists(location): size = os.stat(location).st_size if size >= self.pyfile.size: raise SkipDownload("File exists") pyfile = self.core.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) if pyfile: if os.path.exists(location): raise SkipDownload(pyfile[0]) self.logDebug("File %s not skipped, because it does not exists." % self.pyfile.name)
def package_extracted(self, pypack): if self.core.config.get("general", "folder_per_package"): download_folder = fs_join(self.core.config.get("general", "download_folder"), pypack.folder) else: download_folder = self.core.config.get("general", "download_folder") for script in self.scripts['package_extracted']: self.callScript(script, pypack.id, pypack.name, download_folder)
def packageDeleted(self, pid): pack = self.core.api.getPackageInfo(pid) if self.core.config.get("general", "folder_per_package"): download_folder = fs_join(self.core.config.get("general", "download_folder"), pack.folder) else: download_folder = self.core.config.get("general", "download_folder") for script in self.scripts['package_deleted']: self.callScript(script, pack.id, pack.name, download_folder, pack.password)
def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None): # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") try: tmpTif = open( fs_join("tmp", "tmpTif_%s.tif" % self.getClassName()), "wb") tmpTif.close() # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") tmpTxt = open( fs_join("tmp", "tmpTxt_%s.txt" % self.getClassName()), "wb") tmpTxt.close() except IOError, e: self.logError(e) return
def getDeleteFiles(self): dir, name = os.path.split(self.filename) # actually extracted file files = [self.filename] # eventually Multipart Files files.extend(fs_join(dir, os.path.basename(file)) for file in filter(self.isMultipart, os.listdir(dir)) if re.sub(self.re_multipart,".rar",name) == re.sub(self.re_multipart,".rar",file)) return files
def decrypt(self, pyfile): m = re.match(self.__pattern, pyfile.url) m_id = m.group('ID') m_type = m.group('TYPE') if m_type == "user": self.logDebug("Url recognized as Channel") user = m_id channel = self.getChannel(user) if channel: playlists = self.getPlaylists(channel['id']) self.logDebug("%s playlist\s found on channel \"%s\"" % (len(playlists), channel['title'])) relatedplaylist = dict((p_name, self.getPlaylist(p_id)) for p_name, p_id in channel['relatedPlaylists'].iteritems()) self.logDebug("Channel's related playlists found = %s" % relatedplaylist.keys()) relatedplaylist['uploads']['title'] = "Unplaylisted videos" relatedplaylist['uploads']['checkDups'] = True #: checkDups flag for p_name, p_data in relatedplaylist.iteritems(): if self.getConfig(p_name): p_data['title'] += " of " + user playlists.append(p_data) else: playlists = [] else: self.logDebug("Url recognized as Playlist") playlists = [self.getPlaylist(m_id)] if not playlists: self.fail(_("No playlist available")) addedvideos = [] urlize = lambda x: "https://www.youtube.com/watch?v=" + x for p in playlists: p_name = p['title'] p_videos = self.getVideosId(p['id']) p_folder = fs_join(self.config.get("general", "download_folder"), p['channelTitle'], p_name) self.logDebug("%s video\s found on playlist \"%s\"" % (len(p_videos), p_name)) if not p_videos: continue elif "checkDups" in p: p_urls = [urlize(v_id) for v_id in p_videos if v_id not in addedvideos] self.logDebug("%s video\s available on playlist \"%s\" after duplicates cleanup" % (len(p_urls), p_name)) else: p_urls = map(urlize, p_videos) self.packages.append((p_name, p_urls, p_folder)) #: folder is NOT recognized by pyload 0.4.9! addedvideos.extend(p_videos)
def writeDebugReport(self, pyfile): """ writes a :return: """ dump_name = "debug_%s_%s.zip" % (pyfile.pluginname, strftime("%d-%m-%Y_%H-%M-%S")) dump = self.getDebugDump(pyfile) try: import zipfile zip = zipfile.ZipFile(dump_name, "w") for f in listdir(join("tmp", pyfile.pluginname)): try: # avoid encoding errors zip.write(join("tmp", pyfile.pluginname, f), fs_join(pyfile.pluginname, f)) except Exception: pass info = zipfile.ZipInfo( fs_join(pyfile.pluginname, "debug_Report.txt"), gmtime()) info.external_attr = 0644 << 16L #: change permissions zip.writestr(info, dump) zip.close() if not stat(dump_name).st_size: raise Exception("Empty Zipfile") except Exception, e: self.m.log.debug("Error creating zip file: %s" % e) dump_name = dump_name.replace(".zip", ".txt") f = open(dump_name, "wb") f.write(dump) f.close()
def loadToDisk(self): """loads container to disk if its stored remotely and overwrite url, or check existent on several places at disk""" if self.pyfile.url.startswith("http"): self.pyfile.name = re.findall("([^\/=]+)", self.pyfile.url)[-1] content = self.load(self.pyfile.url) self.pyfile.url = fs_join(self.core.config.get("general", "download_folder"), self.pyfile.name) try: with open(self.pyfile.url, "wb") as f: f.write(content) except IOError, e: self.fail(str(e))
def loadToDisk(self): """loads container to disk if its stored remotely and overwrite url, or check existent on several places at disk""" if self.pyfile.url.startswith("http"): self.pyfile.name = re.findall("([^\/=]+)", self.pyfile.url)[-1] content = self.load(self.pyfile.url) self.pyfile.url = fs_join( self.core.config.get("general", "download_folder"), self.pyfile.name) try: with open(self.pyfile.url, "wb") as f: f.write(content) except IOError, e: self.fail(str(e))
def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): """Downloads the content at url to download folder :param url: :param get: :param post: :param ref: :param cookies: :param disposition: if True and server provides content-disposition header\ the filename will be changed if needed :return: The location where the file was saved """ if self.pyfile.abort: self.abort() if not url: self.fail(_("No url given")) url = urllib.unquote(encode(url).strip()) if self.core.debug: self.logDebug("Download url: " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) self.checkForSameFiles() self.pyfile.setStatus("downloading") if disposition: self.pyfile.name = urlparse.urlparse(url).path.split('/')[-1] or self.pyfile.name download_folder = self.core.config.get("general", "download_folder") location = fs_join(download_folder, self.pyfile.package().folder) if not os.path.exists(location): try: os.makedirs(location, int(self.core.config.get("permission", "folder"), 8)) if self.core.config.get("permission", "change_dl") and os.name != "nt": uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2] gid = grp.getgrnam(self.core.config.get("permission", "group"))[2] os.chown(location, uid, gid) except Exception, e: self.fail(e)
def list(self, password=None): command = "l" if self.fullpath else "l" p = self.call_cmd(command, fs_encode(self.filename), password=password) out, err = p.communicate() if "Can not open" in err: raise ArchiveError(_("Cannot open file")) if p.returncode > 1: raise ArchiveError(_("Process return code: %d") % p.returncode) result = set() for groups in self.re_filelist.findall(out): f = groups[-1].strip() result.add(fs_join(self.out, f)) return list(result)
def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False, follow_location=True, save_cookies=True): """Load content at url and returns it :param url: :param get: :param post: :param ref: :param cookies: :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :param follow_location: If True follow location else not :param save_cookies: If True saves received cookies else discard them :return: Loaded content """ if self.pyfile.abort: self.abort() if not url: self.fail(_("No url given")) url = urllib.unquote(encode(url).strip()) #@NOTE: utf8 vs decode -> please use decode attribute in all future plugins if self.core.debug: self.logDebug("Load url: " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) res = self.req.load(url, get, post, ref, cookies, just_header, decode=decode, follow_location=follow_location, save_cookies=save_cookies) if decode: res = encode(res) if self.core.debug: import inspect frame = inspect.currentframe() framefile = fs_join("tmp", self.getClassName(), "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) try: if not os.path.exists(os.path.join("tmp", self.getClassName())): os.makedirs(os.path.join("tmp", self.getClassName())) with open(framefile, "wb") as f: del frame #: delete the frame or it wont be cleaned f.write(res) except IOError, e: self.logError(e)
def handleDLCLinks(self): self.logDebug("Search for DLC links") package_links = [] m = re.search(self.DLC_LINK_REGEX, self.html) if m: container_url = self.DLC_DOWNLOAD_URL + "?id=%s&dlc=1" % self.fileid self.logDebug("Downloading DLC container link [%s]" % container_url) try: dlc = self.load(container_url) dlc_filename = self.fileid + ".dlc" dlc_filepath = fs_join(self.config.get("general", "download_folder"), dlc_filename) with open(dlc_filepath, "wb") as f: f.write(dlc) package_links.append(dlc_filepath) except Exception: self.fail(_("Unable to download DLC container")) return package_links
def periodical(self): folder = fs_encode(self.getConfig('folder')) file = fs_encode(self.getConfig('file')) try: if not os.path.isdir(os.path.join(folder, "finished")): os.makedirs(os.path.join(folder, "finished")) if self.getConfig('watch_file'): with open(file, "a+") as f: f.seek(0) content = f.read().strip() if content: f = open(file, "wb") f.close() name = "%s_%s.txt" % (file, time.strftime("%H-%M-%S_%d%b%Y")) with open(fs_join(folder, "finished", name), "wb") as f: f.write(content) self.core.api.addPackage(f.name, [f.name], 1) for f in os.listdir(folder): path = os.path.join(folder, f) if not os.path.isfile(path) or f.endswith("~") or f.startswith( "#") or f.startswith("."): continue newpath = os.path.join( folder, "finished", f if self.getConfig('keep') else "tmp_" + f) move(path, newpath) self.logInfo(_("Added %s from HotFolder") % f) self.core.api.addPackage(f, [newpath], 1) except (IOError, OSError), e: self.logError(e)
def decrypt(self, pyfile): fs_filename = fs_encode(pyfile.url.strip()) opener = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler) dlc_content = opener.open('http://service.jdownloader.net/dlcrypt/getDLC.php', {'src' : "ccf", 'filename': "test.ccf", 'upload' : open(fs_filename, "rb")}).read() download_folder = self.config.get("general", "download_folder") dlc_file = fs_join(download_folder, "tmp_%s.dlc" % pyfile.name) try: dlc = re.search(r'<dlc>(.+)</dlc>', dlc_content, re.S).group(1).decode('base64') except AttributeError: self.fail(_("Container is corrupted")) with open(dlc_file, "w") as tempdlc: tempdlc.write(dlc) self.urls = [dlc_file]
def decrypt(self, pyfile): m = re.match(self.__pattern, pyfile.url) m_id = m.group('ID') m_type = m.group('TYPE') if m_type == "playlist": self.logDebug("Url recognized as Playlist") p_info = self.getPlaylistInfo(m_id) playlists = [(m_id,) + p_info] if p_info else None else: self.logDebug("Url recognized as Channel") playlists = self.getPlaylists(m_id) self.logDebug("%s playlist\s found on channel \"%s\"" % (len(playlists), m_id)) if not playlists: self.fail(_("No playlist available")) for p_id, p_name, p_owner in playlists: p_videos = self.getVideos(p_id) p_folder = fs_join(self.config.get("general", "download_folder"), p_owner, p_name) self.logDebug("%s video\s found on playlist \"%s\"" % (len(p_videos), p_name)) self.packages.append((p_name, p_videos, p_folder)) #: folder is NOT recognized by pyload 0.4.9!
def periodical(self): folder = fs_encode(self.getConfig('folder')) file = fs_encode(self.getConfig('file')) try: if not os.path.isdir(os.path.join(folder, "finished")): os.makedirs(os.path.join(folder, "finished")) if self.getConfig('watch_file'): with open(file, "a+") as f: f.seek(0) content = f.read().strip() if content: f = open(file, "wb") f.close() name = "%s_%s.txt" % (file, time.strftime("%H-%M-%S_%d%b%Y")) with open(fs_join(folder, "finished", name), "wb") as f: f.write(content) self.core.api.addPackage(f.name, [f.name], 1) for f in os.listdir(folder): path = os.path.join(folder, f) if not os.path.isfile(path) or f.endswith("~") or f.startswith("#") or f.startswith("."): continue newpath = os.path.join(folder, "finished", f if self.getConfig('keep') else "tmp_" + f) shutil.move(path, newpath) self.logInfo(_("Added %s from HotFolder") % f) self.core.api.addPackage(f, [newpath], 1) except (IOError, OSError), e: self.logError(e)
def removePlugins(self, type_plugins): """ delete plugins from disk """ if not type_plugins: return removed = set() self.logDebug("Requested deletion of plugins: %s" % type_plugins) for type, name in type_plugins: rootplugins = os.path.join(pypath, "module", "plugins") for dir in ("userplugins", rootplugins): py_filename = fs_join(dir, type, name + ".py") pyc_filename = py_filename + "c" if type == "addon": try: self.manager.deactivateAddon(name) except Exception, e: self.logDebug(e) for filename in (py_filename, pyc_filename): if not exists(filename): continue try: os.remove(filename) except OSError, e: self.logError(_("Error removing: %s") % filename, e) else: id = (type, name) removed.add(id)
def checkForSameFiles(self, starting=False): """ checks if same file was/is downloaded within same package :param starting: indicates that the current download is going to start :raises SkipDownload: """ pack = self.pyfile.package() for pyfile in self.core.files.cache.values(): if pyfile != self.pyfile and pyfile.name == self.pyfile.name and pyfile.package( ).folder == pack.folder: if pyfile.status in (0, 12): #: finished or downloading raise SkipDownload(pyfile.pluginname) elif pyfile.status in ( 5, 7 ) and starting: #: a download is waiting/starting and was appenrently started before raise SkipDownload(pyfile.pluginname) download_folder = self.core.config.get("general", "download_folder") location = fs_join(download_folder, pack.folder, self.pyfile.name) if starting and self.core.config.get( "download", "skip_existing") and os.path.exists(location): size = os.stat(location).st_size if size >= self.pyfile.size: raise SkipDownload("File exists") pyfile = self.core.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) if pyfile: if os.path.exists(location): raise SkipDownload(pyfile[0]) self.logDebug("File %s not skipped, because it does not exists." % self.pyfile.name)
class ExternalScripts(Addon): __name = "ExternalScripts" __type = "addon" __version = "0.39" __config = [("activated", "bool", "Activated" , True), ("waitend" , "bool", "Wait script ending", False)] __description = """Run external scripts""" __license = "GPLv3" __authors = [("mkaay" , "*****@*****.**"), ("RaNaN" , "*****@*****.**"), ("spoob" , "*****@*****.**"), ("Walter Purcaro", "*****@*****.**")] event_list = ["archive_extract_failed", "archive_extracted" , "package_extract_failed", "package_extracted" , "all_archives_extracted", "all_archives_processed", "allDownloadsFinished" , "allDownloadsProcessed" , "packageDeleted"] def setup(self): self.info['oldip'] = None self.scripts = {} folders = ["pyload_start", "pyload_restart", "pyload_stop", "before_reconnect", "after_reconnect", "download_preparing", "download_failed", "download_finished", "archive_extract_failed", "archive_extracted", "package_finished", "package_deleted", "package_extract_failed", "package_extracted", "all_downloads_processed", "all_downloads_finished", #@TODO: Invert `all_downloads_processed`, `all_downloads_finished` order in 0.4.10 "all_archives_extracted", "all_archives_processed"] for folder in folders: self.scripts[folder] = [] for dir in (pypath, ''): self.initPluginType(folder, os.path.join(dir, 'scripts', folder)) for script_type, names in self.scripts.iteritems(): if names: self.logInfo(_("Installed scripts for: ") + script_type, ", ".join(map(os.path.basename, names))) self.pyload_start() def initPluginType(self, name, dir): if not os.path.isdir(dir): try: os.makedirs(dir) except OSError, e: self.logDebug(e) return for filename in os.listdir(dir): file = fs_join(dir, filename) if not os.path.isfile(file): continue if filename[0] in ("#", "_") or filename.endswith("~") or filename.endswith(".swp"): continue if not os.access(file, os.X_OK): self.logWarning(_("Script not executable:") + " %s/%s" % (name, filename)) self.scripts[name].append(file)
self.createPackages() def loadToDisk(self): """loads container to disk if its stored remotely and overwrite url, or check existent on several places at disk""" if self.pyfile.url.startswith("http"): self.pyfile.name = re.findall("([^\/=]+)", self.pyfile.url)[-1] content = self.load(self.pyfile.url) self.pyfile.url = fs_join(self.core.config.get("general", "download_folder"), self.pyfile.name) try: with open(self.pyfile.url, "wb") as f: f.write(content) except IOError, e: self.fail(str(e)) else: self.pyfile.name = os.path.basename(self.pyfile.url) if not os.path.exists(self.pyfile.url): if os.path.exists(fs_join(pypath, self.pyfile.url)): self.pyfile.url = fs_join(pypath, self.pyfile.url) else: self.fail(_("File not exists")) def deleteTmp(self): if self.pyfile.name.startswith("tmp_"): os.remove(self.pyfile.url)
def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False, follow_location=True, save_cookies=True): """Load content at url and returns it :param url: :param get: :param post: :param ref: :param cookies: :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :param follow_location: If True follow location else not :param save_cookies: If True saves received cookies else discard them :return: Loaded content """ if self.pyfile.abort: self.abort() if not url: self.fail(_("No url given")) url = urllib.unquote( encode(url).strip() ) #@NOTE: utf8 vs decode -> please use decode attribute in all future plugins if self.core.debug: self.logDebug( "Load url: " + url, *[ "%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url") ]) res = self.req.load(url, get, post, ref, cookies, just_header, decode=decode, follow_location=follow_location, save_cookies=save_cookies) if decode: res = encode(res) if self.core.debug: import inspect frame = inspect.currentframe() framefile = fs_join( "tmp", self.getClassName(), "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) try: if not os.path.exists(os.path.join("tmp", self.getClassName())): os.makedirs(os.path.join("tmp", self.getClassName())) with open(framefile, "wb") as f: del frame #: delete the frame or it wont be cleaned f.write(res) except IOError, e: self.logError(e)
def _updatePlugins(self, data): """ check for plugin updates """ exitcode = 0 updated = [] url = data[0] schema = data[1].split('|') VERSION = re.compile(r'__version.*=.*("|\')([\d.]+)') if "BLACKLIST" in data: blacklist = data[data.index('BLACKLIST') + 1:] updatelist = data[2:data.index('BLACKLIST')] else: blacklist = [] updatelist = data[2:] updatelist = [dict(zip(schema, x.split('|'))) for x in updatelist] blacklist = [dict(zip(schema, x.split('|'))) for x in blacklist] if blacklist: type_plugins = [(plugin['type'], plugin['name'].rsplit('.', 1)[0]) for plugin in blacklist] # Protect UpdateManager from self-removing try: type_plugins.remove(("addon", "UpdateManager")) except ValueError: pass for t, n in type_plugins: for idx, plugin in enumerate(updatelist): if n == plugin['name'] and t == plugin['type']: updatelist.pop(idx) break for t, n in self.removePlugins(sorted(type_plugins)): self.logInfo(_("Removed blacklisted plugin: [%(type)s] %(name)s") % { 'type': t, 'name': n, }) for plugin in sorted(updatelist, key=operator.itemgetter("type", "name")): filename = plugin['name'] type = plugin['type'] version = plugin['version'] if filename.endswith(".pyc"): name = filename[:filename.find("_")] else: name = filename.replace(".py", "") plugins = getattr(self.core.pluginManager, "%sPlugins" % type) oldver = float(plugins[name]['version']) if name in plugins else None newver = float(version) if not oldver: msg = "New plugin: [%(type)s] %(name)s (v%(newver).2f)" elif newver > oldver: msg = "New version of plugin: [%(type)s] %(name)s (v%(oldver).2f -> v%(newver).2f)" else: continue self.logInfo(_(msg) % {'type': type, 'name': name, 'oldver': oldver, 'newver': newver}) try: content = getURL(url % plugin) m = VERSION.search(content) if m and m.group(2) == version: with open(fs_join("userplugins", type, filename), "wb") as f: f.write(content) updated.append((type, name)) else: raise Exception, _("Version mismatch") except Exception, e: self.logError(_("Error updating plugin: %s") % filename, e)
def _updatePlugins(self, data): """ check for plugin updates """ exitcode = 0 updated = [] url = data[0] schema = data[1].split('|') VERSION = re.compile(r'__version.*=.*("|\')([\d.]+)') if "BLACKLIST" in data: blacklist = data[data.index('BLACKLIST') + 1:] updatelist = data[2:data.index('BLACKLIST')] else: blacklist = [] updatelist = data[2:] updatelist = [dict(zip(schema, x.split('|'))) for x in updatelist] blacklist = [dict(zip(schema, x.split('|'))) for x in blacklist] if blacklist: type_plugins = [(plugin['type'], plugin['name'].rsplit('.', 1)[0]) for plugin in blacklist] # Protect UpdateManager from self-removing try: type_plugins.remove(("addon", "UpdateManager")) except ValueError: pass for t, n in type_plugins: for idx, plugin in enumerate(updatelist): if n == plugin['name'] and t == plugin['type']: updatelist.pop(idx) break for t, n in self.removePlugins(sorted(type_plugins)): self.logInfo( _("Removed blacklisted plugin: [%(type)s] %(name)s") % { 'type': t, 'name': n, }) for plugin in sorted(updatelist, key=operator.itemgetter("type", "name")): filename = plugin['name'] type = plugin['type'] version = plugin['version'] if filename.endswith(".pyc"): name = filename[:filename.find("_")] else: name = filename.replace(".py", "") plugins = getattr(self.core.pluginManager, "%sPlugins" % type) oldver = float( plugins[name]['version']) if name in plugins else None newver = float(version) if not oldver: msg = "New plugin: [%(type)s] %(name)s (v%(newver).2f)" elif newver > oldver: msg = "New version of plugin: [%(type)s] %(name)s (v%(oldver).2f -> v%(newver).2f)" else: continue self.logInfo( _(msg) % { 'type': type, 'name': name, 'oldver': oldver, 'newver': newver }) try: content = getURL(url % plugin) m = VERSION.search(content) if m and m.group(2) == version: with open(fs_join("userplugins", type, filename), "wb") as f: f.write(content) updated.append((type, name)) else: raise Exception, _("Version mismatch") except Exception, e: self.logError(_("Error updating plugin: %s") % filename, e)
def decrypt(self, pyfile): m = re.match(self.__pattern, pyfile.url) m_id = m.group('ID') m_type = m.group('TYPE') if m_type == "user": self.logDebug("Url recognized as Channel") user = m_id channel = self.getChannel(user) if channel: playlists = self.getPlaylists(channel['id']) self.logDebug("%s playlist\s found on channel \"%s\"" % (len(playlists), channel['title'])) relatedplaylist = { p_name: self.getPlaylist(p_id) for p_name, p_id in channel['relatedPlaylists'].iteritems() } self.logDebug("Channel's related playlists found = %s" % relatedplaylist.keys()) relatedplaylist['uploads']['title'] = "Unplaylisted videos" relatedplaylist['uploads'][ 'checkDups'] = True #: checkDups flag for p_name, p_data in relatedplaylist.iteritems(): if self.getConfig(p_name): p_data['title'] += " of " + user playlists.append(p_data) else: playlists = [] else: self.logDebug("Url recognized as Playlist") playlists = [self.getPlaylist(m_id)] if not playlists: self.fail(_("No playlist available")) addedvideos = [] urlize = lambda x: "https://www.youtube.com/watch?v=" + x for p in playlists: p_name = p['title'] p_videos = self.getVideosId(p['id']) p_folder = fs_join(self.config.get("general", "download_folder"), p['channelTitle'], p_name) self.logDebug("%s video\s found on playlist \"%s\"" % (len(p_videos), p_name)) if not p_videos: continue elif "checkDups" in p: p_urls = [ urlize(v_id) for v_id in p_videos if v_id not in addedvideos ] self.logDebug( "%s video\s available on playlist \"%s\" after duplicates cleanup" % (len(p_urls), p_name)) else: p_urls = map(urlize, p_videos) self.packages.append( (p_name, p_urls, p_folder)) #: folder is NOT recognized by pyload 0.4.9! addedvideos.extend(p_videos)
def doDownload(self, url): self.pyfile.setStatus("waiting") #: real link m = re.match(r'xdcc://(.*?)/#?(.*?)/(.*?)/#?(\d+)/?', url) server = m.group(1) chan = m.group(2) bot = m.group(3) pack = m.group(4) nick = self.getConfig('nick') ident = self.getConfig('ident') real = self.getConfig('realname') temp = server.split(':') ln = len(temp) if ln == 2: host, port = temp elif ln == 1: host, port = temp[0], 6667 else: self.fail(_("Invalid hostname for IRC Server: %s") % server) ####################### # CONNECT TO IRC AND IDLE FOR REAL LINK dl_time = time.time() sock = socket.socket() sock.connect((host, int(port))) if nick == "pyload": nick = "pyload-%d" % (time.time() % 1000) #: last 3 digits sock.send("NICK %s\r\n" % nick) sock.send("USER %s %s bla :%s\r\n" % (ident, host, real)) self.setWait(3) self.wait() sock.send("JOIN #%s\r\n" % chan) sock.send("PRIVMSG %s :xdcc send #%s\r\n" % (bot, pack)) # IRC recv loop readbuffer = "" done = False retry = None m = None while True: # done is set if we got our real link if done: break if retry: if time.time() > retry: retry = None dl_time = time.time() sock.send("PRIVMSG %s :xdcc send #%s\r\n" % (bot, pack)) else: if (dl_time + self.timeout) < time.time(): #@TODO: add in config sock.send("QUIT :byebye\r\n") sock.close() self.fail(_("XDCC Bot did not answer")) fdset = select([sock], [], [], 0) if sock not in fdset[0]: continue readbuffer += sock.recv(1024) temp = readbuffer.split("\n") readbuffer = temp.pop() for line in temp: if self.debug is 2: print "*> " + unicode(line, errors='ignore') line = line.rstrip() first = line.split() if first[0] == "PING": sock.send("PONG %s\r\n" % first[1]) if first[0] == "ERROR": self.fail(_("IRC-Error: %s") % line) msg = line.split(None, 3) if len(msg) != 4: continue msg = { "origin": msg[0][1:], "action": msg[1], "target": msg[2], "text": msg[3][1:] } if nick == msg['target'][0:len(nick)] and "PRIVMSG" == msg['action']: if msg['text'] == "\x01VERSION\x01": self.logDebug("Sending CTCP VERSION") sock.send("NOTICE %s :%s\r\n" % (msg['origin'], "pyLoad! IRC Interface")) elif msg['text'] == "\x01TIME\x01": self.logDebug("Sending CTCP TIME") sock.send("NOTICE %s :%d\r\n" % (msg['origin'], time.time())) elif msg['text'] == "\x01LAG\x01": pass #: don't know how to answer if not (bot == msg['origin'][0:len(bot)] and nick == msg['target'][0:len(nick)] and msg['action'] in ("PRIVMSG", "NOTICE")): continue if self.debug is 1: print "%s: %s" % (msg['origin'], msg['text']) if "You already requested that pack" in msg['text']: retry = time.time() + 300 if "you must be on a known channel to request a pack" in msg['text']: self.fail(_("Wrong channel")) m = re.match('\x01DCC SEND (.*?) (\d+) (\d+)(?: (\d+))?\x01', msg['text']) if m: done = True # get connection data ip = socket.inet_ntoa(struct.pack('L', socket.ntohl(int(m.group(2))))) port = int(m.group(3)) packname = m.group(1) if len(m.groups()) > 3: self.req.filesize = int(m.group(4)) self.pyfile.name = packname download_folder = self.config.get("general", "download_folder") filename = fs_join(download_folder, packname) self.logInfo(_("Downloading %s from %s:%d") % (packname, ip, port)) self.pyfile.setStatus("downloading") newname = self.req.download(ip, port, filename, sock, self.pyfile.setProgress) if newname and newname != filename: self.logInfo(_("%(name)s saved as %(newname)s") % {"name": self.pyfile.name, "newname": newname}) filename = newname # kill IRC socket # sock.send("QUIT :byebye\r\n") sock.close() self.lastDownload = filename return self.lastDownload
def doDownload(self, url): self.pyfile.setStatus("waiting") #: real link m = re.match(r'xdcc://(.*?)/#?(.*?)/(.*?)/#?(\d+)/?', url) server = m.group(1) chan = m.group(2) bot = m.group(3) pack = m.group(4) nick = self.getConfig('nick') ident = self.getConfig('ident') real = self.getConfig('realname') temp = server.split(':') ln = len(temp) if ln == 2: host, port = temp elif ln == 1: host, port = temp[0], 6667 else: self.fail(_("Invalid hostname for IRC Server: %s") % server) ####################### # CONNECT TO IRC AND IDLE FOR REAL LINK dl_time = time.time() sock = socket.socket() sock.connect((host, int(port))) if nick == "pyload": nick = "pyload-%d" % (time.time() % 1000) #: last 3 digits sock.send("NICK %s\r\n" % nick) sock.send("USER %s %s bla :%s\r\n" % (ident, host, real)) self.setWait(3) self.wait() sock.send("JOIN #%s\r\n" % chan) sock.send("PRIVMSG %s :xdcc send #%s\r\n" % (bot, pack)) # IRC recv loop readbuffer = "" done = False retry = None m = None while True: # done is set if we got our real link if done: break if retry: if time.time() > retry: retry = None dl_time = time.time() sock.send("PRIVMSG %s :xdcc send #%s\r\n" % (bot, pack)) else: if (dl_time + self.timeout) < time.time(): #@TODO: add in config sock.send("QUIT :byebye\r\n") sock.close() self.fail(_("XDCC Bot did not answer")) fdset = select.select([sock], [], [], 0) if sock not in fdset[0]: continue readbuffer += sock.recv(1024) temp = readbuffer.split("\n") readbuffer = temp.pop() for line in temp: if self.debug is 2: print "*> " + unicode(line, errors='ignore') line = line.rstrip() first = line.split() if first[0] == "PING": sock.send("PONG %s\r\n" % first[1]) if first[0] == "ERROR": self.fail(_("IRC-Error: %s") % line) msg = line.split(None, 3) if len(msg) != 4: continue msg = { "origin": msg[0][1:], "action": msg[1], "target": msg[2], "text": msg[3][1:] } if nick == msg['target'][0:len(nick)] and "PRIVMSG" == msg['action']: if msg['text'] == "\x01VERSION\x01": self.logDebug("Sending CTCP VERSION") sock.send("NOTICE %s :%s\r\n" % (msg['origin'], "pyLoad! IRC Interface")) elif msg['text'] == "\x01TIME\x01": self.logDebug("Sending CTCP TIME") sock.send("NOTICE %s :%d\r\n" % (msg['origin'], time.time())) elif msg['text'] == "\x01LAG\x01": pass #: don't know how to answer if not (bot == msg['origin'][0:len(bot)] and nick == msg['target'][0:len(nick)] and msg['action'] in ("PRIVMSG", "NOTICE")): continue if self.debug is 1: print "%s: %s" % (msg['origin'], msg['text']) if "You already requested that pack" in msg['text']: retry = time.time() + 300 if "you must be on a known channel to request a pack" in msg['text']: self.fail(_("Wrong channel")) m = re.match('\x01DCC SEND (.*?) (\d+) (\d+)(?: (\d+))?\x01', msg['text']) if m: done = True # get connection data ip = socket.inet_ntoa(struct.pack('L', socket.ntohl(int(m.group(2))))) port = int(m.group(3)) packname = m.group(1) if len(m.groups()) > 3: self.req.filesize = int(m.group(4)) self.pyfile.name = packname download_folder = self.config.get("general", "download_folder") filename = fs_join(download_folder, packname) self.logInfo(_("Downloading %s from %s:%d") % (packname, ip, port)) self.pyfile.setStatus("downloading") newname = self.req.download(ip, port, filename, sock, self.pyfile.setProgress) if newname and newname != filename: self.logInfo(_("%(name)s saved as %(newname)s") % {"name": self.pyfile.name, "newname": newname}) filename = newname # kill IRC socket # sock.send("QUIT :byebye\r\n") sock.close() self.lastDownload = filename return self.lastDownload
class OCR(Base): __name = "OCR" __type = "ocr" __version = "0.12" __description = """OCR base plugin""" __license = "GPLv3" __authors = [("pyLoad Team", "*****@*****.**")] def __init__(self): self.logger = logging.getLogger("log") def load_image(self, image): self.image = Image.open(image) self.pixels = self.image.load() self.result_captcha = '' def deactivate(self): """delete all tmp images""" pass def threshold(self, value): self.image = self.image.point(lambda a: a * value + 10) def run(self, command): """Run a command""" popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) popen.wait() output = popen.stdout.read() + " | " + popen.stderr.read() popen.stdout.close() popen.stderr.close() self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None): # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") try: tmpTif = open( fs_join("tmp", "tmpTif_%s.tif" % self.__class__.__name__), "wb") tmpTif.close() # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") tmpTxt = open( fs_join("tmp", "tmpTxt_%s.txt" % self.__class__.__name__), "wb") tmpTxt.close() except IOError, e: self.logError(e) return self.logger.debug("save tiff") self.image.save(tmpTif.name, 'TIFF') if os.name == "nt": tessparams = [os.path.join(pypath, "tesseract", "tesseract.exe")] else: tessparams = ["tesseract"] tessparams.extend([ os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "") ]) if pagesegmode: tessparams.extend(["-psm", str(pagesegmode)]) if subset and (digits or lowercase or uppercase): # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") with open( fs_join("tmp", "tmpSub_%s.subset" % self.__class__.__name__), "wb") as tmpSub: tmpSub.write("tessedit_char_whitelist ") if digits: tmpSub.write("0123456789") if lowercase: tmpSub.write("abcdefghijklmnopqrstuvwxyz") if uppercase: tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") tmpSub.write("\n") tessparams.append("nobatch") tessparams.append(os.path.abspath(tmpSub.name)) self.logger.debug("run tesseract") self.run(tessparams) self.logger.debug("read txt") try: with open(tmpTxt.name, 'r') as f: self.result_captcha = f.read().replace("\n", "") except Exception: self.result_captcha = "" self.logger.debug(self.result_captcha) try: os.remove(tmpTif.name) os.remove(tmpTxt.name) if subset and (digits or lowercase or uppercase): os.remove(tmpSub.name) except Exception: pass
def extract(self, ids, thread=None): #@TODO: Use pypack, not pid to improve method usability if not ids: return False self.extracting = True processed = [] extracted = [] failed = [] toList = lambda string: string.replace(' ', '').replace(',', '|').replace(';', '|').split('|') destination = self.getConfig('destination') subfolder = self.getConfig('subfolder') fullpath = self.getConfig('fullpath') overwrite = self.getConfig('overwrite') renice = self.getConfig('renice') recursive = self.getConfig('recursive') delete = self.getConfig('delete') keepbroken = self.getConfig('keepbroken') extensions = [x.lstrip('.').lower() for x in toList(self.getConfig('extensions'))] excludefiles = toList(self.getConfig('excludefiles')) if extensions: self.logDebug("Use for extensions: %s" % "|.".join(extensions)) # reload from txt file self.reloadPasswords() download_folder = self.config.get("general", "download_folder") # iterate packages -> extractors -> targets for pid in ids: pypack = self.core.files.getPackage(pid) if not pypack: self.queue.remove(pid) continue self.logInfo(_("Check package: %s") % pypack.name) # determine output folder out = fs_join(download_folder, pypack.folder, destination, "") #: force trailing slash if subfolder: out = fs_join(out, pypack.folder) if not os.path.exists(out): os.makedirs(out) matched = False success = True files_ids = dict((pylink['name'], ((fs_join(download_folder, pypack.folder, pylink['name'])), pylink['id'], out)) for pylink in sorted(pypack.getChildren().itervalues(), key=lambda k: k['name'])).values() #: remove duplicates # check as long there are unseen files while files_ids: new_files_ids = [] if extensions: files_ids = [(fname, fid, fout) for fname, fid, fout in files_ids if filter(lambda ext: fname.lower().endswith(ext), extensions)] for Extractor in self.extractors: targets = Extractor.getTargets(files_ids) if targets: self.logDebug("Targets for %s: %s" % (Extractor.__name__, targets)) matched = True for fname, fid, fout in targets: name = os.path.basename(fname) if not os.path.exists(fname): self.logDebug(name, "File not found") continue self.logInfo(name, _("Extract to: %s") % fout) try: pyfile = self.core.files.getFile(fid) archive = Extractor(self, fname, fout, fullpath, overwrite, excludefiles, renice, delete, keepbroken, fid) thread.addActive(pyfile) archive.init() try: new_files = self._extract(pyfile, archive, pypack.password) finally: pyfile.setProgress(100) thread.finishFile(pyfile) except Exception, e: self.logError(name, e) success = False continue # remove processed file and related multiparts from list files_ids = [(fname, fid, fout) for fname, fid, fout in files_ids if fname not in archive.getDeleteFiles()] self.logDebug("Extracted files: %s" % new_files) self.setPermissions(new_files) for filename in new_files: file = fs_encode(fs_join(os.path.dirname(archive.filename), filename)) if not os.path.exists(file): self.logDebug("New file %s does not exists" % filename) continue if recursive and os.path.isfile(file): new_files_ids.append((filename, fid, os.path.dirname(filename))) #: append as new target self.manager.dispatchEvent("archive_extracted", pyfile, archive) files_ids = new_files_ids #: also check extracted files if matched: if success: extracted.append(pid) self.manager.dispatchEvent("package_extracted", pypack) else: failed.append(pid) self.manager.dispatchEvent("package_extract_failed", pypack) self.failed.add(pid) else: self.logInfo(_("No files found to extract")) if not matched or not success and subfolder: try: os.rmdir(out) except OSError: pass self.queue.remove(pid)
self.createPackages() def loadToDisk(self): """loads container to disk if its stored remotely and overwrite url, or check existent on several places at disk""" if self.pyfile.url.startswith("http"): self.pyfile.name = re.findall("([^\/=]+)", self.pyfile.url)[-1] content = self.load(self.pyfile.url) self.pyfile.url = fs_join( self.core.config.get("general", "download_folder"), self.pyfile.name) try: with open(self.pyfile.url, "wb") as f: f.write(content) except IOError, e: self.fail(str(e)) else: self.pyfile.name = os.path.basename(self.pyfile.url) if not os.path.exists(self.pyfile.url): if os.path.exists(fs_join(pypath, self.pyfile.url)): self.pyfile.url = fs_join(pypath, self.pyfile.url) else: self.fail(_("File not exists")) def deleteTmp(self): if self.pyfile.name.startswith("tmp_"): os.remove(self.pyfile.url)
def extract(self, ids, thread=None): #@TODO: Use pypack, not pid to improve method usability if not ids: return False processed = [] extracted = [] failed = [] toList = lambda string: string.replace(' ', '').replace(',', '|').replace(';', '|').split('|') destination = self.getConfig('destination') subfolder = self.getConfig('subfolder') fullpath = self.getConfig('fullpath') overwrite = self.getConfig('overwrite') renice = self.getConfig('renice') recursive = self.getConfig('recursive') delete = self.getConfig('delete') keepbroken = self.getConfig('keepbroken') extensions = [x.lstrip('.').lower() for x in toList(self.getConfig('extensions'))] excludefiles = toList(self.getConfig('excludefiles')) if extensions: self.logDebug("Use for extensions: %s" % "|.".join(extensions)) # reload from txt file self.reloadPasswords() download_folder = self.config.get("general", "download_folder") # iterate packages -> extractors -> targets for pid in ids: pypack = self.core.files.getPackage(pid) if not pypack: self.queue.remove(pid) continue self.logInfo(_("Check package: %s") % pypack.name) # determine output folder out = fs_join(download_folder, pypack.folder, destination, "") #: force trailing slash if subfolder: out = fs_join(out, pypack.folder) if not os.path.exists(out): os.makedirs(out) matched = False success = True files_ids = dict((pylink['name'], ((fs_join(download_folder, pypack.folder, pylink['name'])), pylink['id'], out)) for pylink in sorted(pypack.getChildren().itervalues(), key=lambda k: k['name'])).values() #: remove duplicates # check as long there are unseen files while files_ids: new_files_ids = [] if extensions: files_ids = [(fname, fid, fout) for fname, fid, fout in files_ids if filter(lambda ext: fname.lower().endswith(ext), extensions)] for Extractor in self.extractors: targets = Extractor.getTargets(files_ids) if targets: self.logDebug("Targets for %s: %s" % (Extractor.__name__, targets)) matched = True for fname, fid, fout in targets: name = os.path.basename(fname) if not os.path.exists(fname): self.logDebug(name, "File not found") continue self.logInfo(name, _("Extract to: %s") % fout) try: pyfile = self.core.files.getFile(fid) archive = Extractor(self, fname, fout, fullpath, overwrite, excludefiles, renice, delete, keepbroken, fid) thread.addActive(pyfile) archive.init() try: new_files = self._extract(pyfile, archive, pypack.password) finally: pyfile.setProgress(100) thread.finishFile(pyfile) except Exception, e: self.logError(name, e) success = False continue # remove processed file and related multiparts from list files_ids = [(fname, fid, fout) for fname, fid, fout in files_ids if fname not in archive.getDeleteFiles()] self.logDebug("Extracted files: %s" % new_files) self.setPermissions(new_files) for filename in new_files: file = fs_encode(fs_join(os.path.dirname(archive.filename), filename)) if not os.path.exists(file): self.logDebug("New file %s does not exists" % filename) continue if recursive and os.path.isfile(file): new_files_ids.append((filename, fid, os.path.dirname(filename))) #: append as new target self.manager.dispatchEvent("archive_extracted", pyfile, archive) files_ids = new_files_ids #: also check extracted files if matched: if success: extracted.append(pid) self.manager.dispatchEvent("package_extracted", pypack) else: failed.append(pid) self.manager.dispatchEvent("package_extract_failed", pypack) self.failed.add(pid) else: self.logInfo(_("No files found to extract")) if not matched or not success and subfolder: try: os.rmdir(out) except OSError: pass self.queue.remove(pid)