def download(self): __settings__ = xbmcaddon.Addon(id='plugin.video.alfa') ### Alfa xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], self.platform['version'], libname) if libname != 'liblibtorrent.so': try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=False) ### Alfa log("%s -> %s" % (url, dest)) xbmc.executebuiltin( 'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__, text, 750, __icon__)) else: xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest, silent=True) ### Alfa dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Path')), \ 'lib', libname) ### Alfa xbmcvfs.copy(dest, dest_alfa, silent=True) ### Alfa dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Profile')), \ 'custom_code', 'lib', libname) ### Alfa xbmcvfs.copy(dest, dest_alfa, silent=True) ### Alfa return True
def __init__(self): self.api_key = '1D62F2F90030C444' self.cache = Cache('tvdb.db', 1.0) self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.thetvdb.com/' }
def __init__(self): self.cache = Cache('kinopoisk.db', 1.0) self.html = Clear() self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.kinopoisk.ru/level/7/' }
def __init__(self, language='en'): self.api_key = '33DBB309BB2B0ADB' dbname='tvdb.%s.db' % language self.cache = Cache(dbname, 1.0) self.language = language self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.thetvdb.com/' }
def __init__(self, root_url): self.target_url = root_url server = urlparse.urlparse(root_url).netloc self.http_engine = HTTP.HTTP(server) self.myls = lswww.lswww(root_url, http_engine=self.http_engine) self.xmlRepGenParser = ReportGeneratorsXMLParser() self.xmlRepGenParser.parse( os.path.join(CONF_DIR, "config/reports/generators.xml"))
class LibraryManager(): def __init__(self, dest_path, platform): self.dest_path = dest_path self.platform = platform def check_update(self): need_update = False if __settings__.getSetting('plugin_name') != __plugin__: __settings__.setSetting('plugin_name', __plugin__) for libname in get_libname(self.platform): self.libpath = os.path.join(self.dest_path, libname) self.sizepath = os.path.join(self.dest_path, libname + '.size.txt') size = str(os.path.getsize(self.libpath)) size_old = open(self.sizepath, "r").read() if size_old != size: need_update = True return need_update def update(self): if self.check_update(): for libname in get_libname(self.platform): self.libpath = os.path.join(self.dest_path, libname) xbmcvfs.delete(self.libpath) self.download() def download(self): xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname) try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin( 'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" % (__plugin__, text, 750)) return True
def download(self): dirname = os.path.dirname(self.filename) zipname = os.path.basename(self.filename).replace('.db', '') + '.zip' url = 'http://www.tat-store.ru/torrenter/' + zipname self.http = HTTP() response = self.http.fetch(url, download=os.path.join(dirname, zipname), progress=True) if response.error: return False try: filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r') filezip.extractall(dirname) filezip.close() except: return False return True
def download(self): xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], self.platform['version'], libname) if libname!='liblibtorrent.so': try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__,text,750,__icon__)) else: xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest) return True
def download(self): xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname) try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin( 'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" % (__plugin__, text, 750)) return True
class LibraryManager(): def __init__(self, dest_path, platform): self.dest_path = dest_path self.platform = platform def check_update(self): need_update=False if __settings__.getSetting('plugin_name')!=__plugin__: __settings__.setSetting('plugin_name', __plugin__) for libname in get_libname(self.platform): self.libpath = os.path.join(self.dest_path, libname) self.sizepath=os.path.join(self.dest_path, libname+'.size.txt') size=str(os.path.getsize(self.libpath)) size_old=open( self.sizepath, "r" ).read() if size_old!=size: need_update=True return need_update def update(self): if self.check_update(): for libname in get_libname(self.platform): self.libpath = os.path.join(self.dest_path, libname) xbmcvfs.delete(self.libpath) self.download() def download(self): xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname) try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" % (__plugin__,text,750)) return True
def __init__(self): self.api_key = '33DBB309BB2B0ADB' self.cache = Cache('tvdb.db', 1.0) self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.thetvdb.com/' }
def attackGET(self, http_res): page = http_res.path resp_headers = http_res.headers referer = http_res.referer headers = {} if referer: headers["referer"] = referer url = page if url not in self.attackedGET: if self.verbose == 2: print(u"+ {0}".format(url)) err1 = self.__returnErrorByCode(resp_headers["status_code"]) if err1 != "ok": data1 = self.HTTP.send(url, headers=headers).getPage() # .htaccess protection detected if self.verbose >= 1: self.log(_("HtAccess protection found: {0}"), url) evil_req = HTTP.HTTPResource(url, method="ABC") data2, code2 = self.HTTP.send(evil_req, headers=headers).getPageCode() err2 = self.__returnErrorByCode(code2) if err2 == "ok": # .htaccess bypass success if self.verbose >= 1: self.logC(_("|HTTP Code: {0} : {1}"), resp_headers["status_code"], err1) if self.verbose == 2: self.logY(_("Source code:")) self.logW(data1) # report xml generator (ROMULUS) not implemented for htaccess self.logVuln(category=Vulnerability.HTACCESS, level=Vulnerability.HIGH_LEVEL, request=evil_req, info=_("{0} HtAccess").format(err1)) self.logR(_(" .htaccess bypass vulnerability: {0}"), evil_req.url) # print output informations by verbosity option if self.verbose >= 1: self.logC(_("|HTTP Code: {0}"), code2) if self.verbose == 2: self.logY(_("Source code:")) self.logW(data2) self.attackedGET.append(url)
class DownloaderClass(): def __init__(self, dest_path): self.dest_path = dest_path self.platform = get_platform() tempdir(self.dest_path) def tools_download(self): for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], libname) if libname!='liblibtorrent.so': try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__,text,750,__icon__)) else: x=xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest) return True
def download(self): xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname) try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" % (__plugin__,text,750)) return True
def __end_element(self, name): if name == self.RESOURCE: http_res = HTTP.HTTPResource(self.path, method=self.method, encoding=self.encoding, referer=self.referer, get_params=self.get_params, post_params=self.post_params, file_params=self.file_params) http_res.setHeaders(self.headers) if self.array is self.toBrowse: self.toBrowse.append(http_res) else: if self.method == "GET": self.browsed.append(http_res) elif self.method == "POST": self.forms.append(http_res)
def download(self): xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform["system"], self.platform["version"], libname) if libname != "liblibtorrent.so": try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = "Failed download %s!" % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__, text, 750, __icon__)) else: xbmcvfs.copy(os.path.join(self.dest_path, "libtorrent.so"), dest) return True
class LibraryManager(): def __init__(self, dest_path, platform): self.dest_path = dest_path self.platform = platform self.root=os.path.dirname(__file__) def check_exist(self): for libname in get_libname(self.platform): if not xbmcvfs.exists(os.path.join(self.dest_path,libname)): return False return True def check_update(self): need_update=False for libname in get_libname(self.platform): if libname!='liblibtorrent.so': self.libpath = os.path.join(self.dest_path, libname) self.sizepath=os.path.join(self.root, self.platform['system'], self.platform['version'], libname+'.size.txt') size=str(os.path.getsize(self.libpath)) size_old=open( self.sizepath, "r" ).read() if size_old!=size: need_update=True return need_update def update(self): if self.check_update(): for libname in get_libname(self.platform): self.libpath = os.path.join(self.dest_path, libname) xbmcvfs.delete(self.libpath) self.download() def download(self): xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], self.platform['version'], libname) if libname!='liblibtorrent.so': try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__,text,750,__icon__)) else: xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest) return True def android_workaround(self, new_dest_path): for libname in get_libname(self.platform): libpath=os.path.join(self.dest_path, libname) size=str(os.path.getsize(libpath)) new_libpath=os.path.join(new_dest_path, libname) if not xbmcvfs.exists(new_libpath): xbmcvfs.copy(libpath, new_libpath) log('Copied %s -> %s' %(libpath, new_libpath)) else: new_size=str(os.path.getsize(new_libpath)) if size!=new_size: xbmcvfs.delete(new_libpath) xbmcvfs.copy(libpath, new_libpath) log('Deleted and copied (%s) %s -> (%s) %s' %(size, libpath, new_size, new_libpath)) return new_dest_path
def attackPOST(self, form): """This method performs the file handling attack with method POST""" # copies get_params = form.get_params post_params = form.post_params file_params = form.file_params referer = form.referer err = "" for params_list in [get_params, post_params, file_params]: for i in xrange(len(params_list)): timeouted = False warn = 0 inc = 0 err500 = 0 saved_value = params_list[i][1] param_name = self.HTTP.quote(params_list[i][0]) if params_list is file_params: params_list[i][1] = ["_FILE__", params_list[i][1][1]] else: params_list[i][1] = "__FILE__" attack_pattern = HTTP.HTTPResource(form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params) if attack_pattern not in self.attackedPOST: self.attackedPOST.append(attack_pattern) for payload in self.payloads: payload = payload.replace('[FILE_NAME]', form.file_name) if params_list is file_params: payload = payload.replace('[VALUE]', saved_value[0]) payload = payload.replace( '[DIRVALUE]', saved_value[0].rsplit('/', 1)[0]) params_list[i][1][0] = payload else: payload = payload.replace('[VALUE]', saved_value) payload = payload.replace( '[DIRVALUE]', saved_value.rsplit('/', 1)[0]) params_list[i][1] = payload evil_req = HTTP.HTTPResource(form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params, referer=referer) if self.verbose == 2: print(u"+ {0}".format(evil_req)) try: data, code = self.HTTP.send(evil_req).getPageCode() except requests.exceptions.Timeout: if timeouted: continue data = "" code = "408" self.logAnom(category=Anomaly.RES_CONSUMPTION, level=Anomaly.MEDIUM_LEVEL, request=evil_req, parameter=param_name, info=Anomaly.MSG_PARAM_TIMEOUT.format( param_name)) self.logO(Anomaly.MSG_TIMEOUT, evil_req.path) self.logO(Anomaly.MSG_EVIL_REQUEST) self.logC(evil_req.http_repr) print('') timeouted = True else: err, inc, warn = self.__findPatternInResponse( data, warn) if err != "": info_msg = _( "{0} via injection in the parameter {1}") self.logVuln(category=Vulnerability.FILE_HANDLING, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter=param_name, info=info_msg.format(err, param_name)) self.logR(Vulnerability.MSG_PARAM_INJECT, err, evil_req.url, param_name) self.logR(Vulnerability.MSG_EVIL_REQUEST) self.logC(evil_req.http_repr) print('') if inc: break else: if code == "500" and err500 == 0: err500 = 1 self.logAnom(category=Anomaly.ERROR_500, level=Anomaly.HIGH_LEVEL, request=evil_req, parameter=param_name, info=Anomaly.MSG_PARAM_500.format( param_name)) self.logO(Anomaly.MSG_500, evil_req.url) self.logO(Anomaly.MSG_EVIL_REQUEST) self.logC(evil_req.http_repr) print('') params_list[i][1] = saved_value
def attackPOST(self, form): """This method performs the SQL Injection attack with method POST""" payload = "\xbf'\"(" filename_payload = "'\"(" err = "" # copies get_params = form.get_params post_params = form.post_params file_params = form.file_params referer = form.referer for params_list in [get_params, post_params, file_params]: for i in xrange(len(params_list)): saved_value = params_list[i][1] if params_list is file_params: params_list[i][1] = ["_SQL__", params_list[i][1][1]] else: params_list[i][1] = "__SQL__" param_name = self.HTTP.quote(params_list[i][0]) attack_pattern = HTTP.HTTPResource(form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params) if attack_pattern not in self.attackedPOST: self.attackedPOST.append(attack_pattern) if params_list is file_params: params_list[i][1][0] = filename_payload else: params_list[i][1] = payload evil_req = HTTP.HTTPResource(form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params, referer=referer) if self.verbose == 2: print(u"+ {0}".format(evil_req)) try: resp = self.HTTP.send(evil_req) data, code = resp.getPageCode() except requests.exceptions.Timeout, timeout: # No timeout report here... launch blind sql detection later data = "" code = "408" resp = timeout else: err = self.__findPatternInResponse(data) if err != "": self.logVuln( category=Vulnerability.SQL_INJECTION, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter=param_name, info=_("{0} via injection in the parameter {1}" ).format(err, param_name)) self.logR(Vulnerability.MSG_PARAM_INJECT, err, evil_req.url, param_name) self.logR(Vulnerability.MSG_EVIL_REQUEST) self.logC(evil_req.http_repr) print('') self.vulnerablePOST.append(attack_pattern) else: if code == "500": self.logAnom( category=Anomaly.ERROR_500, level=Anomaly.HIGH_LEVEL, request=evil_req, parameter=param_name, info=Anomaly.MSG_PARAM_500.format(param_name)) self.logO(Anomaly.MSG_500, evil_req.url) self.logO(Anomaly.MSG_EVIL_REQUEST) self.logC(evil_req.http_repr) print('') params_list[i][1] = saved_value
def attackGET(self, http_res): """This method performs the SQL Injection attack with method GET""" page = http_res.path params_list = http_res.get_params resp_headers = http_res.headers referer = http_res.referer headers = {} if referer: headers["referer"] = referer # about this payload : http://shiflett.org/blog/2006/jan/addslashes-versus-mysql-real-escape-string payload = "\xBF'\"(" vuln_found = 0 if not params_list: # Do not attack application-type files if not "content-type" in resp_headers: # Sometimes there's no content-type... so we rely on the document extension if (page.split(".")[-1] not in self.allowed) and page[-1] != "/": return elif not "text" in resp_headers["content-type"]: return err = "" payload = self.HTTP.quote(payload) url = page + "?" + payload if url not in self.attackedGET: self.attackedGET.append(url) evil_req = HTTP.HTTPResource(url) if self.verbose == 2: print(u"+ {0}".format(url)) try: resp = self.HTTP.send(evil_req, headers=headers) data, code = resp.getPageCode() except requests.exceptions.Timeout, timeout: # No timeout report here... launch blind sql detection later data = "" code = "408" err = "" resp = timeout else: err = self.__findPatternInResponse(data) if err != "": vuln_found += 1 self.logVuln( category=Vulnerability.SQL_INJECTION, level=Vulnerability.HIGH_LEVEL, request=evil_req, info=_("{0} via injection in the query string").format( err)) self.logR(Vulnerability.MSG_QS_INJECT, err, page) self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url) self.vulnerableGET.append(page + "?" + "__SQL__") else: if code == "500": self.logAnom(category=Anomaly.ERROR_500, level=Anomaly.HIGH_LEVEL, request=evil_req, info=Anomaly.MSG_QS_500) self.logO(Anomaly.MSG_500, page) self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)
class Cache: def __init__(self, name, version, expire=0, size=0, step=100): self.name = name self.version = version self._connect() if expire: self.expire(expire) if size: self.size(size, step) def get(self, token, callback, *param): cur = self.db.cursor() cur.execute('select expire,data from cache where id=? limit 1', (token,)) row = cur.fetchone() cur.close() if row: if row[0] and row[0] < int(time.time()): pass else: try: obj = pickle.loads(row[1]) except: pass else: return obj response = callback(*param) if response[0]: obj = sqlite.Binary(pickle.dumps(response[1])) curtime = int(time.time()) cur = self.db.cursor() if isinstance(response[0], bool): cur.execute('replace into cache(id,addtime,expire,data) values(?,?,?,?)', (token, curtime, None, obj)) else: cur.execute('replace into cache(id,addtime,expire,data) values(?,?,?,?)', (token, curtime, curtime + response[0], obj)) self.db.commit() cur.close() return response[1] def expire(self, expire): # with rtrCache_lock: cur = self.db.cursor() cur.execute('delete from cache where addtime<?', (int(time.time()) - expire,)) self.db.commit() cur.close() def size(self, size, step=100): # with rtrCache_lock: while True: if os.path.getsize(self.filename) < size: break cur = self.db.cursor() cur.execute('select id from cache order by addtime asc limit ?', (step,)) rows = cur.fetchall() if not rows: cur.close() break cur.execute('delete from cache where id in (' + ','.join(len(rows) * '?') + ')', [x[0] for x in rows]) self.db.commit() cur.close() def flush(self): # with rtrCache_lock: cur = self.db.cursor() cur.execute('delete from cache') self.db.commit() cur.close() def _connect(self): with rtrCache_lock: dirname = xbmc.translatePath('special://temp') for subdir in ('xbmcup', 'plugin.video.torrenter'): dirname = os.path.join(dirname, subdir) if not xbmcvfs.exists(dirname): xbmcvfs.mkdir(dirname) self.filename = os.path.join(dirname, self.name) first = False if not xbmcvfs.exists(self.filename): first = True self.db = sqlite.connect(self.filename, check_same_thread=False) if not first: cur = self.db.cursor() try: cur.execute('select version from db_ver') row = cur.fetchone() if not row or float(row[0]) != self.version: cur.execute('drop table cache') cur.execute('drop table if exists db_ver') first = True except: cur.execute('drop table cache') first = True self.db.commit() cur.close() if first and not self.first_time(): cur = self.db.cursor() cur.execute('pragma auto_vacuum=1') cur.execute('create table cache(id varchar(255) unique, addtime integer, expire integer, data blob)') cur.execute('create index time on cache(addtime asc)') cur.execute('create table db_ver(version real)') cur.execute('insert into db_ver(version) values(?)', (self.version,)) self.db.commit() cur.close() def first_time(self): scrapers = {'tvdb': 'TheTVDB.com', 'tmdb': 'TheMovieDB.org', 'kinopoisk': 'KinoPoisk.ru'} ok = xbmcgui.Dialog().yesno(Localization.localize('Content Lists'), Localization.localize('Do you want to preload full metadata?') + ' (%s)' % ( scrapers[os.path.basename(self.filename).split('.')[0]]), Localization.localize('It is highly recommended!')) if ok: return self.download() else: return False def download(self): dirname = os.path.dirname(self.filename) zipname = os.path.basename(self.filename).replace('.db', '') + '.zip' url = 'http://www.tat-store.ru/torrenter/' + zipname self.http = HTTP() response = self.http.fetch(url, download=os.path.join(dirname, zipname), progress=True) if response.error: return False try: filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r') filezip.extractall(dirname) filezip.close() except: return False return True
class TvDb: """ API: scraper - скрапер search - поиск сериалов movie - профайл фильма """ def __init__(self): self.api_key = '1D62F2F90030C444' self.cache = Cache('tvdb.db', 1.0) self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.thetvdb.com/' } # API def scraper(self, search, year=None): try: if not isinstance(search, list): search = [search] tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8')) except: return None else: if year: tag += ':' + str(year) id = self.cache.get(tag, self._scraper, search, year) if not id: return None return self.movie(id) def search(self, name): return self._search(name) def movie(self, id): id = str(id) return self.cache.get('movie:' + id, self._movie, id) def _movie(self, id): dirname = tempfile.mkdtemp() response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip')) if response.error: self._movie_clear(dirname) return False, None try: filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r') filezip.extractall(dirname) filezip.close() movie = file(os.path.join(dirname, 'ru.xml'), 'rb').read().decode('utf8') except: self._movie_clear(dirname) return False, None self._movie_clear(dirname) body = re.compile(r'<Series>(.+?)</Series>', re.U|re.S).search(movie) if not body: return False, None body = body.group(1) res = { 'icon' : None, 'thumbnail': None, 'properties': { 'fanart_image': None, }, 'info': { 'count' : int(id) } } # режисеры и сценаристы for tag in ('Director', 'Writer'): people = {} people_list = [] [people_list.extend(x.split('|')) for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U|re.S).findall(movie)] [people.update({x: 1}) for x in [x.strip() for x in people_list] if x] if people: res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x]) for tag, retag, typeof, targettype in ( ('plot', 'Overview', None, None), ('mpaa', 'ContentRating', None, None), ('premiered', 'FirstAired', None, None), ('studio', 'Network', None, None), ('title', 'SeriesName', None, None), ('runtime', 'Runtime', None, None), ('votes', 'RatingCount', None, None), ('rating', 'Rating', float, None), ('genre', 'Genre', list, unicode), ('cast', 'Actors', list, None) ): r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U|re.S).search(body) if r: r = r.group(1).strip() if typeof == float: res['info'][tag] = float(r) elif typeof == list: if targettype == unicode: res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x]) else: res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x] else: res['info'][tag] = r # год if 'premiered' in res['info']: res['info']['year'] = int(res['info']['premiered'].split('-')[0]) # постер r = re.compile(r'<poster>([^<]+)</poster>', re.U|re.S).search(body) if r: res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip() res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip() # фанарт r = re.compile(r'<fanart>([^<]+)</fanart>', re.U|re.S).search(body) if r: res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip() timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year: timeout = 7*24*60*60 #week return timeout, res def _movie_clear(self, dirname): for filename in os.listdir(dirname): try: os.unlink(os.path.join(dirname, filename)) except: raise try: os.rmdir(dirname) except: raise def _search(self, search): for name in search: response = self.http.fetch('http://www.thetvdb.com/api/GetSeries.php?language=ru&seriesname=' + urllib.quote_plus(name.encode('utf8')), headers=self.headers) if response.error: return None res = [] rows = re.compile('<Series>(.+?)</Series>', re.U|re.S).findall(response.body.decode('utf8')) if rows: recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U|re.S) for row in [x for x in rows if x.find(u'<language>ru</language>') != -1]: r = recmd.search(row) if r: res.append(int(r.group(1))) # в некоторых случаях можно найти только по оригинальному названию, # но при этом русское описание есть if not res: for row in [x for x in rows if x.find(u'<language>en</language>') != -1]: r = recmd.search(row) if r: res.append(int(r.group(1))) if res: break return {'pages': (1, 0, 1, 0), 'data': res} def _scraper(self, name, year): timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if year and year >= time.gmtime(time.time()).tm_year: timeout = 7*24*60*60 #week ids = self._search(name) if ids is None: return False, None elif not ids['data']: # сохраняем пустой результат на 3-е суток return 259200, None else: return timeout, ids['data'][0]
class KinoPoisk: """ API: scraper - скрапер movie - профайл фильма search - поиск фильма best - поиск лучших фильмов person - поиск персон work - информация о работах персоны """ def __init__(self, language='ru'): dbname = 'kinopoisk.%s.db' % language self.cache = Cache(dbname, 1.0) self.html = Clear() self.timeout = 60.0 self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.kinopoisk.ru/level/7/' } # API def scraper(self, search, year=None): try: if not isinstance(search, list): search = [search] tag = 'scraper:' + urllib.quote_plus( ":".join(search).encode('utf8')) except: return None else: if year: tag += ':' + str(year) id = self.cache.get(tag, self._scraper, search, year) if not id: return None return self.movie(id) def movie(self, id): id = str(id) return self.cache.get('movie:' + id, self._movie, id) def search(self, search, year): return self._search_movie(search, year) def countries(self): return COUNTRIES def country(self, id, default=None): country = [x[1] for x in COUNTRIES if x[0] == id] return country[0] if country else default def _search_movie(self, search, year=None): parser = kinopoisk.pageparser.PageParser(kinopoisk.LOGGER, isDebug=True) orginalname = search[0] if len(search) > 1: name = search[1] else: name = None results = parser.fetchAndParseSearchResults(orginalname, year, name) if results and results[0][3] > 70: return results[0][0] def _scraper(self, search, year): timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if year and year > time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 * 4 # 4 week movie_id = self._search_movie(search, year) if movie_id is None: # сохраняем пустой результат на 4 week return 7 * 24 * 60 * 60 * 4, None else: return timeout, movie_id def _movie(self, id): response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers, timeout=self.timeout) if response.error: return False, None html = response.body.decode('windows-1251') res = { 'icon': None, 'thumbnail': None, 'properties': { 'fanart_image': None, }, 'info': { 'count': int(id) } } # имя, оригинальное имя, девиз, цензура, год, top250 # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла) for tag, reg, cb in ( ('title', '<title>(.+?)</title>', self.html.string), ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', self.html.string), ('tagline', '<td style="color\: #555">«(.+?)»</td></tr>', self.html.string), ('mpaa', 'images/mpaa/([^\.]+).gif', self.html.string), ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>', self.html.string), ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', int), ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', int)): r = re.compile(reg, re.U).search(html) if r: value = r.group(1).strip() if value: res['info'][tag] = cb(value) # режисеры, сценаристы, жанры for tag, reg in (('director', u'<td itemprop="director">(.+?)</td>'), ( 'writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'), ('genre', u'<span itemprop="genre">(.+?)</span>')): r = re.compile(reg, re.U | re.S).search(html) if r: r2 = [] for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)): r = self.html.string(r) if r and r != '...': r2.append(r) if r2: res['info'][tag] = u', '.join(r2) # актеры r = re.compile(u'<h4>В главных ролях:</h4>(.+?)</ul>', re.U | re.S).search(html) if r: actors = [] for r in re.compile( '<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)): r = self.html.string(r) if r and r != '...': actors.append(r) if actors: res['info']['cast'] = actors[:] # res['info']['castandrole'] = actors[:] # описание фильма r = re.compile( '<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>', re.U).search(html) if r: plot = self.html.text(r.group(1).replace('<=end=>', '\n')) if plot: res['info']['plot'] = plot # IMDB r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html) if r: res['info']['rating'] = float(r.group(1).strip()) res['info']['votes'] = r.group(2).strip() # премьера r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U | re.S).search(html) if r: r = re.compile(u'data\-ical\-date="([^"]+)"', re.U | re.S).search(r.group(1)) if r: data = r.group(1).split(' ') if len(data) == 3: i = 0 for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'): i += 1 if mon == data[1]: mon = str(i) if len(mon) == 1: mon = '0' + mon day = data[0] if len(day) == 1: day = '0' + day res['info']['premiered'] = '-'.join( [data[2], mon, day]) break # постер r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U | re.S).search(html) if r: poster = r.group(1).replace("'", '').strip() if poster: res['thumbnail'] = res['icon'] = 'http://kinopoisk.ru' + poster menu = re.compile( '<ul id="newMenuSub" class="clearfix(.+?)<!\-\- /menu \-\->', re.U | re.S).search(html) if menu: menu = menu.group(1) # фанарт if menu.find('/film/' + id + '/wall/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') fanart = re.compile( '<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html) if fanart: fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2))) # пробуем взять максимально подходящее fanart_best = [x for x in fanart if int(x[1]) <= 1280] if fanart_best: fanart = fanart_best response = self.http.fetch( 'http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html) if r: res['properties']['fanart_image'] = r.group( 1).strip() # если нет фанарта (обоев), то пробуем получить кадры if not res['properties']['fanart_image'] and menu.find( '/film/' + id + '/stills/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') fanart = re.compile( '<a href="/picture/([0-9]+)/"><img src="[^<]+</a>[^<]+<b><i>([0-9]+)×([0-9]+)</i>', re.U).findall(html) if fanart: fanart.sort(cmp=lambda (id1, size1, t1), ( id2, size2, t2): cmp(int(size1), int(size2))) # пробуем взять максимально подходящее fanart_best = [ x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2]) ] if fanart_best: fanart = fanart_best response = self.http.fetch( 'http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html) if r: res['properties']['fanart_image'] = r.group( 1).strip() # студии if menu.find('/film/' + id + '/studio/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U | re.S).search(html) if r: studio = [] for r in re.compile( '<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)): r = self.html.string(r) if r: studio.append(r) if studio: res['info']['studio'] = u', '.join(studio) timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if 'year' not in res['info'] or not res['properties']['fanart_image'] \ or int(res['info']['year']) > time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 * 4 # 4 week return timeout, res
# No more payload injection break # URL contains parameters else: for i in xrange(len(params_list)): saved_value = params_list[i][1] param_name = self.HTTP.quote(params_list[i][0]) params_list[i][1] = "__XSS__" url = page + "?" + self.HTTP.encode(params_list) if url not in self.attackedGET: self.attackedGET.append(url) code = self.random_string() params_list[i][1] = code test_url = HTTP.HTTPResource(page + "?" + self.HTTP.encode(params_list)) self.GET_XSS[code] = (test_url, param_name) try: resp = self.HTTP.send(test_url, headers=headers) data = resp.getPage() except requests.exceptions.Timeout, timeout: data = "" resp = timeout # is the random code on the webpage ? if code in data: # YES! But where exactly ? payloads = self.generate_payloads(data, code) for payload in payloads: params_list[i][1] = payload
class TvDb: """ API: scraper - скрапер search - поиск сериалов movie - профайл фильма """ def __init__(self, language='en'): self.api_key = '33DBB309BB2B0ADB' dbname='tvdb.%s.db' % language self.cache = Cache(dbname, 1.0) self.language = language self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.thetvdb.com/' } # API def scraper(self, search, year=None): try: if not isinstance(search, list): search = [search] tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8')) except: return None else: if year: tag += ':' + str(year) id = self.cache.get(tag, self._scraper, search, year) if not id: return None return self.movie(id) def search(self, search, year=None): return self._search(search, year) def movie(self, id): id = str(id) return self.cache.get('movie:' + id, self._movie, id) def _movie(self, id): try: dirname = tempfile.mkdtemp() except: dirname = xbmc.translatePath('special://temp') for subdir in ('xbmcup', 'plugin.video.torrenter'): dirname = os.path.join(dirname, subdir) if not os.path.exists(dirname): os.mkdir(dirname) url = 'http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/' + self.language + '.zip' # print url response = self.http.fetch(url, headers=self.headers, download=os.path.join(dirname, 'movie.zip'), timeout=20) if response.error: print "ERRRRRROR! " + str(response.error) self._movie_clear(dirname) return False, None try: filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r') filezip.extractall(dirname) filezip.close() movie = file(os.path.join(dirname, self.language + '.xml'), 'rb').read().decode('utf8') except: self._movie_clear(dirname) return False, None self._movie_clear(dirname) body = re.compile(r'<Series>(.+?)</Series>', re.U | re.S).search(movie) if not body: return False, None body = body.group(1) res = { 'icon': None, 'thumbnail': None, 'properties': { 'fanart_image': None, }, 'info': { 'count': int(id) } } # режисеры и сценаристы for tag in ('Director', 'Writer'): people = {} people_list = [] [people_list.extend(x.split('|')) for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U | re.S).findall(movie)] [people.update({x: 1}) for x in [x.strip() for x in people_list] if x] if people: res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x]) for tag, retag, typeof, targettype in ( ('plot', 'Overview', None, None), ('mpaa', 'ContentRating', None, None), ('premiered', 'FirstAired', None, None), ('studio', 'Network', None, None), ('title', 'SeriesName', None, None), ('runtime', 'Runtime', None, None), ('votes', 'RatingCount', None, None), ('rating', 'Rating', float, None), ('genre', 'Genre', list, unicode), ('cast', 'Actors', list, None) ): r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U | re.S).search(body) if r: r = r.group(1).strip() if typeof == float: res['info'][tag] = float(r) elif typeof == list: if targettype == unicode: res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x]) else: res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x] else: res['info'][tag] = r # год if 'premiered' in res['info']: res['info']['year'] = int(res['info']['premiered'].split('-')[0]) # постер r = re.compile(r'<poster>([^<]+)</poster>', re.U | re.S).search(body) if r: res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip() res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip() # фанарт r = re.compile(r'<fanart>([^<]+)</fanart>', re.U | re.S).search(body) if r: res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip() timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if 'year' not in res['info'] or not res['properties']['fanart_image'] \ or int(res['info']['year']) > time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 * 4 #4 week return timeout, res def _movie_clear(self, dirname): for filename in os.listdir(dirname): try: os.unlink(os.path.join(dirname, filename)) except: raise try: os.rmdir(dirname) except: raise def _search(self, search, year=None): i = -1 id = None for name in search: # print urllib.quote_plus(name.encode('utf-8')) url = 'http://www.thetvdb.com/api/GetSeries.php?language=' + self.language + '&seriesname=' + urllib.quote_plus( name.encode('utf-8')) #print url i += 1 response = self.http.fetch(url, headers=self.headers, timeout=20) #print response.body if response.error: #print "ERRRRRROR! "+str(response.error) return None res = [] rows = re.compile('<Series>(.+?)</Series>', re.U | re.S).findall(response.body.decode('utf8')) if rows: recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U | re.S) for row in [x for x in rows if x.find(u'<language>%s</language>' % self.language.decode('utf8')) != -1]: r = recmd.search(row) if r: res.append(int(r.group(1))) # в некоторых случаях можно найти только по оригинальному названию, # но при этом русское описание есть if not res and self.language != 'en': for row in [x for x in rows if x.find(u'<language>en</language>') != -1]: r = recmd.search(row) if r: res.append(int(r.group(1))) if len(res) > 1: Data = [] for id in res: for row in rows: recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U | re.S) r = recmd.search(row) if int(r.group(1)) == id: title = re.compile('<SeriesName>(.+?)</SeriesName>', re.U | re.S).search(row) Syear = re.compile('<FirstAired>(.+?)</FirstAired>', re.U | re.S).search(row) if not Syear: Syear = 0 else: Syear = Syear.group(1) Data.append((title.group(1), Syear, id)) index = get_best(Data, search, year) if index and index['rate'] > 70: id = str(index['id']) elif len(res) == 1: id = str(res[0]) if id: break return id def _scraper(self, search, year): timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if year and year > time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 * 4 # 4week id = self._search(search, year) if id is None: return 7 * 24 * 60 * 60 * 4, None else: # print str((timeout, ids['data'][0])) return timeout, id
info=Anomaly.MSG_QS_500) self.logO(Anomaly.MSG_500, page) self.logO(Anomaly.MSG_EVIL_URL, evil_req.url) else: for i in range(len(params_list)): err = "" param_name = self.HTTP.quote(params_list[i][0]) saved_value = params_list[i][1] params_list[i][1] = "__SQL__" pattern_url = page + "?" + self.HTTP.encode(params_list) if pattern_url not in self.attackedGET: self.attackedGET.append(pattern_url) params_list[i][1] = self.HTTP.quote(payload) url = page + "?" + self.HTTP.encode(params_list) evil_req = HTTP.HTTPResource(url) if self.verbose == 2: print(u"+ {0}".format(evil_req.url)) try: resp = self.HTTP.send(evil_req, headers=headers) data, code = resp.getPageCode() except requests.exceptions.Timeout, timeout: # No timeout report here... launch blind sql detection later data = "" code = "408" err = "" resp = timeout else: err = self.__findPatternInResponse(data) if err != "":
class KinoPoisk: """ API: scraper - скрапер movie - профайл фильма search - поиск фильма best - поиск лучших фильмов person - поиск персон work - информация о работах персоны """ def __init__(self): self.cache = Cache('kinopoisk.db', 1.0) self.html = Clear() self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.kinopoisk.ru/level/7/' } # API def scraper(self, search, year=None, trailer_quality=None): try: if isinstance(search, list): search = search[0] or "" tag = 'scraper:' + urllib.quote_plus(search.encode('windows-1251')) except: return None else: if year: tag += ':' + str(year) id = self.cache.get(tag, self._scraper, search, year) if not id: return None return self.movie(id, trailer_quality) def movie(self, id, trailer_quality=None): id = str(id) if trailer_quality is None: trailer_quality = 6 movie = self.cache.get('movie:' + id, self._movie, id) if not movie: return None if 'trailers' in movie and movie['trailers']: # компилируем список с нужным нам качеством video = [] for m in movie['trailers']: url = [x for x in m['video'] if x[0] <= trailer_quality] if url: m['video'] = url[-1] video.append(m) movie['trailers'] = video if movie['trailers']: # готовим главный трейлер r = [x for x in movie['trailers'] if x['trailer']] if r: movie['info']['trailer'] = r[0]['video'][1] else: # если трейлер не найден, то отдаем что попало... movie['info']['trailer'] = movie['trailers'][0]['video'][1] return movie def search(self, name, trailer_quality=None): return self._search_movie(name) def best(self, **kwarg): page = kwarg.get('page', 1) limit = kwarg.get('limit', 50) url = 'http://www.kinopoisk.ru/top/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str( kwarg.get('votes', 100)) + '/' if kwarg.get('dvd'): url += 'm_act%5Bis_dvd%5D/on/' if kwarg.get('decade'): url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/' if kwarg.get('genre'): url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/' if kwarg.get('country'): url += 'm_act%5Bcountry%5D/' + str(kwarg['country']) + '/' if kwarg.get('rate'): url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/' if kwarg.get('mpaa'): url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/' url += 'perpage/' + str(limit) + '/order/ex_rating/' if page > 1: url += 'page/' + str(page) + '/' response = self.http.fetch(url, headers=self.headers) if response.error: return None res = {'pages': (1, 0, 1, 0), 'data': []} r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo', re.U | re.S).search( response.body.decode('windows-1251')) if r: body = r.group(1) # compile pagelist p = re.compile('>([0-9]+)—[0-9]+[^0-9]+?([0-9]+)', re.U).search(body) if p: page = (int(p.group(1)) - 1) / limit + 1 total = int(p.group(2)) pages = total / limit if limit * pages != total: pages += 1 res['pages'] = (pages, 0 if page == 1 else page - 1, page, 0 if page == pages else page + 1) # end compile for id in re.compile('<div id="tr_([0-9]+)"', re.U | re.S).findall(body): res['data'].append(int(id)) return res def person(self, name): response = self.http.fetch( 'http://www.kinopoisk.ru/s/type/people/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) + '/order/relevant/', headers=self.headers) if response.error: return None res = [] body = re.compile( '<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(response.body.decode('windows-1251')) if body: for block in re.compile('<p class="pic">(.+?)<div class="clear">', re.U | re.S).findall(body.group(1)): id, name, original, year, poster = None, None, None, None, None r = re.compile( '<p class="name"><a href="http://www\.kinopoisk\.ru/level/4/people/([0-9]+)[^>]+>([^<]+)</a>', re.U | re.S).search(block) if r: id = r.group(1) name = r.group(2).strip() if id and name: r = re.compile('<span class="gray">([^<]+)</span>', re.U | re.S).search(block) if r: original = r.group(1).strip() if not original: original = None r = re.compile('<span class="year">([0-9]{4})</span>', re.U | re.S).search(block) if r: year = int(r.group(1)) if block.find('no-poster.gif') == -1: poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg' res.append({ 'id': int(id), 'name': name, 'originalname': original, 'year': year, 'poster': poster }) return {'pages': (1, 0, 1, 0), 'data': res} def work(self, id): response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) + '/', headers=self.headers) if response.error: return None res = {} r = re.compile('id="sort_block">(.+?)<style>', re.U | re.S).search( response.body.decode('windows-1251')) if r: for block in r.group(1).split( u'<table cellspacing="0" cellpadding="0" border="0" width="100%">' ): work = None for w in ('actor', 'director', 'writer', 'producer', 'producer_ussr', 'composer', 'operator', 'editor', 'design', 'voice', 'voice_director'): if block.find(u'id="' + w + u'"') != -1: work = 'producer' if w == 'producer_ussr' else w break if work: movies = [] for id, name in re.compile( '<span class="name"><a href="/film/([0-9]+)/" >([^<]+?)</a>', re.U).findall(block): for tag in (u'(мини-сериал)', u'(сериал)'): if name.find(tag) != -1: break else: movies.append(int(id)) if movies: res.setdefault(work, []).extend(movies) return res def review(self, id, query): query_s = 'all' if query == 'stat' else query data = self.cache.get('review:' + str(id) + ':' + query_s, self._review, id, query_s) if not data: return data return data[query] def countries(self): return COUNTRIES def country(self, id, default=None): country = [x[1] for x in COUNTRIES if x[0] == id] return country[0] if country else default # PRIVATE def _search_movie(self, name, year=None): url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus( name.encode('windows-1251')) # + '/order/relevant' if year: url += '/m_act%5Byear%5D/' + str(year) url += '/m_act%5Btype%5D/film/' response = self.http.fetch(url, headers=self.headers) if response.error: return None res = [] r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search( response.body.decode('windows-1251')) if r: for id in re.compile( '<p class="name"><a href="/level/1/film/([0-9]+)', re.U | re.S).findall(r.group(1)): res.append(int(id)) return {'pages': (1, 0, 1, 0), 'data': res} def _scraper(self, name, year): timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if year and year >= time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 #week ids = self._search_movie(name, year) if ids is None: return False, None elif not ids['data']: # сохраняем пустой результат на 3-е суток return 259200, None else: return timeout, ids['data'][0] def _review(self, id, query): url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/' if query in ('good', 'bad', 'neutral'): url += 'status/' + query + '/' url += 'perpage/200/' response = self.http.fetch(url, headers=self.headers) if response.error: return False, None html = response.body.decode('windows-1251') res = { 'stat': { 'all': 0, 'good': 0, 'bad': 0, 'neutral': 0 }, query: [] } r = re.compile('<ul class="resp_type">(.+?)</ul>', re.U | re.S).search(html) if r: ul = r.group(1) for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')): r = re.compile( '<li class="' + q + '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>', re.U).search(ul) if r: res['stat'][t] = int(r.group(1)) res['stat']['all'] = res['stat']['good'] + res['stat'][ 'bad'] + res['stat']['neutral'] r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(html) if r: for block in r.group(1).split('itemprop="reviews"'): review = { 'nick': None, 'count': None, 'title': None, 'review': None, 'time': None } r = re.compile('itemprop="reviewBody">(.+?)</div>', re.U | re.S).search(block) if r: text = r.group(1) for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'), (u'</b>', u'[/B]'), (u'<i>', u'[I]'), (u'</i>', u'[/I]'), (u'<u>', u'[U]'), (u'</u>', u'[/U]')): text = text.replace(tag1, tag2) r = self.html.text(text) if r: review['review'] = r user = None r = re.compile( '<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>' ).search(block) if r: user = self.html.string(r.group(1)) else: r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>' ).search(block) if r: user = self.html.string(r.group(1)) if user: review['nick'] = user r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search( block) if r: title = self.html.string(r.group(1)) if title: review['title'] = title r = re.compile('<span class="date">([^<]+)</span>', re.U | re.S).search(block) if r: review['time'] = r.group(1).replace(u' |', u',') r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>', re.U | re.S).search(block) if r: review['count'] = int(r.group(1)) if review['nick'] and review['review']: res[query].append(review) return 3600, res # one hour def _movie(self, id): response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers) if response.error: return False, None html = response.body.decode('windows-1251') res = { 'icon': None, 'thumbnail': None, 'info': { 'count': int(id) }, 'properties': { 'fanart_image': None, }, } # имя, оригинальное имя, девиз, цензура, год, top250 # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла) for tag, reg, t in ( ('title', '<title>(.+?)</title>', 'str'), ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', 'str'), ('tagline', '<td style="color\: #555">«(.+?)»</td></tr>', 'str'), ('mpaa', 'itemprop="contentRating"\s+content="MPAA\s+([^"]+)"', 'str'), ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>', 'str'), ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', 'int'), ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', 'int')): r = re.compile(reg, re.U).search(html) if r: value = r.group(1).strip() if value: res['info'][tag] = value if t == 'int': res['info'][tag] = int(res['info'][tag]) else: res['info'][tag] = self.html.string(res['info'][tag]) # режисеры, сценаристы, жанры for tag, reg in (('director', u'<td itemprop="director">(.+?)</td>'), ( 'writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'), ('genre', u'<td itemprop="genre">(.+?)</td>')): r = re.compile(reg, re.U | re.S).search(html) if r: r2 = [] for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)): r = self.html.string(r) if r and r != '...': r2.append(r) if r2: res['info'][tag] = u', '.join(r2) # актеры r = re.compile(u'<h4>В главных ролях:</h4><ul>(.+?)</ul>', re.U | re.S).search(html) if r: actors = [] for r in re.compile( '<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)): r = self.html.string(r) if r and r != '...': actors.append(r) if actors: res['info']['cast'] = actors[:] #res['info']['castandrole'] = actors[:] # описание фильма r = re.compile( '<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>', re.U).search(html) if r: plot = self.html.text(r.group(1).replace('<=end=>', '\n')) if plot: res['info']['plot'] = plot # IMDB r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html) if r: res['info']['rating'] = float(r.group(1).strip()) res['info']['votes'] = r.group(2).strip() # # премьера # r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U|re.S).search(html) # if r: # r = re.compile(u'data\-ical\-date="([^"]+)"', re.U|re.S).search(r.group(1)) # if r: # data = r.group(1).split(' ') # if len(data) == 3: # i = 0 # for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'): # i += 1 # if mon == data[1]: # mon = str(i) # if len(mon) == 1: # mon = '0' + mon # day = data[0] # if len(day) == 1: # day = '0' + day # res['info']['premiered'] = '-'.join([data[2], mon, day]) # break # постер r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U | re.S).search(html) if r: poster = r.group(1).replace("'", '').strip() if poster: if poster.startswith("/"): poster = "http://www.kinopoisk.ru%s" % poster res['icon'] = poster res['thumbnail'] = poster menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)</ul>', re.U | re.S).search(html) if menu: menu = menu.group(1) # фанарт if menu.find('/film/' + id + '/wall/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers) if not response.error: html = response.body.decode('windows-1251') fanart = re.compile( '<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html) if fanart: fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2))) # пробуем взять максимально подходящее fanart_best = [x for x in fanart if int(x[1]) <= 1280] if fanart_best: fanart = fanart_best response = self.http.fetch( 'http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/', headers=self.headers) if not response.error: html = response.body.decode('windows-1251') r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html) if r: res['properties']['fanart_image'] = r.group( 1).strip() # если нет фанарта (обоев), то пробуем получить кадры if not res['properties']['fanart_image'] and menu.find( '/film/' + id + '/stills/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers) if not response.error: html = response.body.decode('windows-1251') fanart = re.compile( '<a href="/picture/([0-9]+)/"><img src="[^<]+</a>[^<]+<b><i>([0-9]+)×([0-9]+)</i>', re.U).findall(html) if fanart: fanart.sort(cmp=lambda (id1, size1, t1), ( id2, size2, t2): cmp(int(size1), int(size2))) # пробуем взять максимально подходящее fanart_best = [ x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2]) ] if fanart_best: fanart = fanart_best response = self.http.fetch( 'http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/', headers=self.headers) if not response.error: html = response.body.decode('windows-1251') r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html) if r: res['properties']['fanart_image'] = r.group( 1).strip() # # студии # if menu.find('/film/' + id + '/studio/') != -1: # response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers) # if not response.error: # html = response.body.decode('windows-1251') # r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U|re.S).search(html) # if r: # studio = [] # for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)): # r = self.html.string(r) # if r: # studio.append(r) # if studio: # res['info']['studio'] = u', '.join(studio) # трэйлеры # trailers1 = [] # русские трейлеры # trailers2 = [] # другие русские видео # trailers3 = [] # трейлеры # trailers4 = [] # другие видео # if menu.find('/film/' + id + '/video/') != -1: # response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/video/', headers=self.headers) # if not response.error: # html = response.body.decode('windows-1251') # for row in re.compile(u'<!-- ролик -->(.+?)<!-- /ролик -->', re.U|re.S).findall(html): # # отсекаем лишние блоки # if row.find(u'>СМОТРЕТЬ</a>') != -1: # # русский ролик? # if row.find('class="flag flag2"') == -1: # is_ru = False # else: # is_ru = True # # получаем имя трейлера # r = re.compile('<a href="/film/' + id + '/video/[0-9]+/[^>]+ class="all">(.+?)</a>', re.U).search(row) # if r: # name = self.html.string(r.group(1)) # if name: # trailer = { # 'name': name, # 'time': None, # 'trailer': False, # 'ru': is_ru, # 'video': [] # } # # трейлер или тизер? # for token in (u'Трейлер', u'трейлер', u'Тизер', u'тизер'): # if name.find(token) != -1: # trailer['trailer'] = True # break # # получаем время трейлера # r = re.compile(u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>', re.U|re.S).search(row) # if r: # trailer['time'] = r.group(1).strip() # # делим ролики по качеству # for r in re.compile('trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>', re.U|re.S).findall(row): # quality = int(r[0]) # if r[1].find('icon-hd') != -1: # quality += 3 # trailer['video'].append((quality, r[2].strip(), r[3])) # if trailer['video']: # if trailer['ru']: # if trailer['trailer']: # trailers1.append(trailer) # else: # trailers2.append(trailer) # else: # if trailer['trailer']: # trailers3.append(trailer) # else: # trailers4.append(trailer) # # склеиваем трейлеры # res['trailers'].extend(trailers1) # res['trailers'].extend(trailers2) # res['trailers'].extend(trailers3) # res['trailers'].extend(trailers4) timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if 'year' not in res['info'] or int( res['info']['year']) >= time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 #week return timeout, res
class TvDb: """ API: scraper - скрапер search - поиск сериалов movie - профайл фильма """ def __init__(self): self.api_key = '33DBB309BB2B0ADB' self.cache = Cache('tvdb.db', 1.0) self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.thetvdb.com/' } # API def scraper(self, search, year=None, season=None): try: if not isinstance(search, list): search = [search] tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8')) except: return None else: if year: tag += ':' + str(year) id = self.cache.get(tag, self._scraper, search, year) if not id: return None if season: return self.get_banners(id) return self.movie(id) def get_banners(self, id): import xml.etree.ElementTree as ET dirname = tempfile.mkdtemp() response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + str(id) + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip')) if response.error: self._movie_clear(dirname) return False, None try: filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r') filezip.extractall(dirname) filezip.close() movie = file(os.path.join(dirname, 'banners.xml'), 'rb').read().decode('utf8') except: self._movie_clear(dirname) return False, None self._movie_clear(dirname) dom = ET.fromstring(movie) if not len(dom): return def dom2dict(node): ret = {} for child in node: if len(child): ret.setdefault(child.tag.lower(), []).append(dom2dict(child)) else: ret[child.tag.lower()] = child.text return ret def update_image_urls(meta): if isinstance(meta, dict): for k, v in meta.items(): if isinstance(v, list): map(update_image_urls, v) elif isinstance(v, dict): update_image_urls(v) elif k in ["banner", "fanart", "poster", "filename", "bannerpath", "vignettepath", "thumbnailpath"] and isinstance(v, basestring): meta[k] = image_url(v) return meta def image_url(fragment): return "%s/banners/%s" % ("http://www.thetvdb.com", fragment) return update_image_urls(dom2dict(dom))["banner"] def search(self, name): return self._search(name) def movie(self, id): id = str(id) return self.cache.get('movie:' + id, self._movie, id) def _movie(self, id): dirname = tempfile.mkdtemp() response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip')) if response.error: self._movie_clear(dirname) return False, None try: filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r') filezip.extractall(dirname) filezip.close() movie = file(os.path.join(dirname, 'ru.xml'), 'rb').read().decode('utf8') except: self._movie_clear(dirname) return False, None self._movie_clear(dirname) body = re.compile(r'<Series>(.+?)</Series>', re.U|re.S).search(movie) if not body: return False, None body = body.group(1) res = { 'icon' : None, 'thumbnail': None, 'properties': { 'fanart_image': None, }, 'info': { 'count' : int(id) } } # режисеры и сценаристы for tag in ('Director', 'Writer'): people = {} people_list = [] [people_list.extend(x.split('|')) for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U|re.S).findall(movie)] [people.update({x: 1}) for x in [x.strip() for x in people_list] if x] if people: res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x]) for tag, retag, typeof, targettype in ( ('plot', 'Overview', None, None), ('mpaa', 'ContentRating', None, None), ('premiered', 'FirstAired', None, None), ('studio', 'Network', None, None), ('title', 'SeriesName', None, None), ('runtime', 'Runtime', None, None), ('votes', 'RatingCount', None, None), ('rating', 'Rating', float, None), ('genre', 'Genre', list, unicode), ('cast', 'Actors', list, None) ): r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U|re.S).search(body) if r: r = r.group(1).strip() if typeof == float: res['info'][tag] = float(r) elif typeof == list: if targettype == unicode: res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x]) else: res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x] else: res['info'][tag] = r # год if 'premiered' in res['info']: res['info']['year'] = int(res['info']['premiered'].split('-')[0]) # постер r = re.compile(r'<poster>([^<]+)</poster>', re.U|re.S).search(body) if r: res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip() res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip() # фанарт r = re.compile(r'<fanart>([^<]+)</fanart>', re.U|re.S).search(body) if r: res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip() timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year: timeout = 7*24*60*60 #week return timeout, res def _movie_clear(self, dirname): for filename in os.listdir(dirname): try: os.unlink(os.path.join(dirname, filename)) except: raise try: os.rmdir(dirname) except: raise def _search(self, search): i=-1 for name in search: i+=1 response = self.http.fetch('http://www.thetvdb.com/api/GetSeries.php?language=ru&seriesname=' + urllib.quote_plus(name.encode('utf-8','ignore')), headers=self.headers) if response.error: return None res = [] rows = re.compile('<Series>(.+?)</Series>', re.U|re.S).findall(response.body.decode('utf8')) if rows: recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U|re.S) for row in [x for x in rows if x.find(u'<language>ru</language>') != -1]: r = recmd.search(row) if r: res.append(int(r.group(1))) # в некоторых случаях можно найти только по оригинальному названию, # но при этом русское описание есть if not res: for row in [x for x in rows if x.find(u'<language>en</language>') != -1]: r = recmd.search(row) if r: res.append(int(r.group(1))) if res: break return {'pages': (1, 0, 1, 0), 'data': res} def _scraper(self, name, year): timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if year and year >= time.gmtime(time.time()).tm_year: timeout = 7*24*60*60 #week ids = self._search(name) if ids is None: return False, None elif not ids['data']: # сохраняем пустой результат на 3-е суток return 259200, None else: return timeout, ids['data'][0]
class KinoPoisk: """ API: scraper - скрапер movie - профайл фильма search - поиск фильма best - поиск лучших фильмов person - поиск персон work - информация о работах персоны """ def __init__(self, language='ru'): dbname = 'kinopoisk.%s.db' % language self.cache = Cache(dbname, 1.0) self.html = Clear() self.timeout = 60.0 self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.kinopoisk.ru/level/7/' } # API def scraper(self, search, year=None): try: if not isinstance(search, list): search = [search] tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8')) except: return None else: if year: tag += ':' + str(year) id = self.cache.get(tag, self._scraper, search, year) if not id: return None return self.movie(id) def movie(self, id): id = str(id) return self.cache.get('movie:' + id, self._movie, id) def search(self, search, year): return self._search_movie(search, year) def countries(self): return COUNTRIES def country(self, id, default=None): country = [x[1] for x in COUNTRIES if x[0] == id] return country[0] if country else default def _search_movie(self, search, year=None): parser = kinopoisk.pageparser.PageParser(kinopoisk.LOGGER, isDebug=True) orginalname = search[0] if len(search) > 1: name = search[1] else: name = None results = parser.fetchAndParseSearchResults(orginalname, year, name) if results and results[0][3] > 70: return results[0][0] def _scraper(self, search, year): timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if year and year > time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 * 4 # 4 week movie_id = self._search_movie(search, year) if movie_id is None: # сохраняем пустой результат на 4 week return 7 * 24 * 60 * 60 * 4, None else: return timeout, movie_id def _movie(self, id): response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers, timeout=self.timeout) if response.error: return False, None html = response.body.decode('windows-1251') res = { 'icon': None, 'thumbnail': None, 'properties': { 'fanart_image': None, }, 'info': { 'count': int(id) } } # имя, оригинальное имя, девиз, цензура, год, top250 # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла) for tag, reg, cb in ( ('title', '<title>(.+?)</title>', self.html.string), ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', self.html.string), ('tagline', '<td style="color\: #555">«(.+?)»</td></tr>', self.html.string), ('mpaa', 'images/mpaa/([^\.]+).gif', self.html.string), ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>', self.html.string), ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', int), ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', int) ): r = re.compile(reg, re.U).search(html) if r: value = r.group(1).strip() if value: res['info'][tag] = cb(value) # режисеры, сценаристы, жанры for tag, reg in ( ('director', u'<td itemprop="director">(.+?)</td>'), ('writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'), ('genre', u'<span itemprop="genre">(.+?)</span>') ): r = re.compile(reg, re.U | re.S).search(html) if r: r2 = [] for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)): r = self.html.string(r) if r and r != '...': r2.append(r) if r2: res['info'][tag] = u', '.join(r2) # актеры r = re.compile(u'<h4>В главных ролях:</h4>(.+?)</ul>', re.U | re.S).search(html) if r: actors = [] for r in re.compile('<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)): r = self.html.string(r) if r and r != '...': actors.append(r) if actors: res['info']['cast'] = actors[:] # res['info']['castandrole'] = actors[:] # описание фильма r = re.compile('<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>', re.U).search(html) if r: plot = self.html.text(r.group(1).replace('<=end=>', '\n')) if plot: res['info']['plot'] = plot # IMDB r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html) if r: res['info']['rating'] = float(r.group(1).strip()) res['info']['votes'] = r.group(2).strip() # премьера r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U | re.S).search(html) if r: r = re.compile(u'data\-ical\-date="([^"]+)"', re.U | re.S).search(r.group(1)) if r: data = r.group(1).split(' ') if len(data) == 3: i = 0 for mon in ( u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'): i += 1 if mon == data[1]: mon = str(i) if len(mon) == 1: mon = '0' + mon day = data[0] if len(day) == 1: day = '0' + day res['info']['premiered'] = '-'.join([data[2], mon, day]) break # постер r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U | re.S).search(html) if r: poster = r.group(1).replace("'", '').strip() if poster: res['thumbnail'] = res['icon'] = 'http://kinopoisk.ru' + poster menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)<!\-\- /menu \-\->', re.U | re.S).search(html) if menu: menu = menu.group(1) # фанарт if menu.find('/film/' + id + '/wall/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') fanart = re.compile('<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html) if fanart: fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2))) # пробуем взять максимально подходящее fanart_best = [x for x in fanart if int(x[1]) <= 1280] if fanart_best: fanart = fanart_best response = self.http.fetch( 'http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html) if r: res['properties']['fanart_image'] = r.group(1).strip() # если нет фанарта (обоев), то пробуем получить кадры if not res['properties']['fanart_image'] and menu.find('/film/' + id + '/stills/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') fanart = re.compile( '<a href="/picture/([0-9]+)/"><img src="[^<]+</a>[^<]+<b><i>([0-9]+)×([0-9]+)</i>', re.U).findall(html) if fanart: fanart.sort(cmp=lambda (id1, size1, t1), (id2, size2, t2): cmp(int(size1), int(size2))) # пробуем взять максимально подходящее fanart_best = [x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2])] if fanart_best: fanart = fanart_best response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html) if r: res['properties']['fanart_image'] = r.group(1).strip() # студии if menu.find('/film/' + id + '/studio/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers, timeout=self.timeout) if not response.error: html = response.body.decode('windows-1251') r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U | re.S).search(html) if r: studio = [] for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)): r = self.html.string(r) if r: studio.append(r) if studio: res['info']['studio'] = u', '.join(studio) timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if 'year' not in res['info'] or not res['properties']['fanart_image'] \ or int(res['info']['year']) > time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 * 4 # 4 week return timeout, res
def attackPOST(self, form): """This method performs the cross site scripting attack (XSS attack) with method POST""" page = form.url referer = form.referer headers = {} if referer: headers["referer"] = referer if page not in self.PHP_SELF: evil_req = None if page.endswith("/"): evil_req = HTTP.HTTPResource(page + self.php_self_payload) elif page.endswith(".php"): evil_req = HTTP.HTTPResource(page + "/" + self.php_self_payload) if evil_req: if self.verbose == 2: print(u"+ {0}".format(evil_req.url)) data, http_code = self.HTTP.send( evil_req, headers=headers).getPageCode() if self._validXSSContentType( evil_req) and self.php_self_check in data: self.logR(Vulnerability.MSG_PATH_INJECT, self.MSG_VULN, page) self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url) self.logVuln( category=Vulnerability.XSS, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter="PHP_SELF", info= _("XSS vulnerability found via injection in the resource path" )) self.PHP_SELF.append(page) # copies get_params = form.get_params post_params = form.post_params file_params = form.file_params for params_list in [get_params, post_params, file_params]: for i in xrange(len(params_list)): param_name = self.HTTP.quote(params_list[i][0]) saved_value = params_list[i][1] if params_list is file_params: params_list[i][1] = ["_XSS__", params_list[i][1][1]] else: params_list[i][1] = "__XSS__" # We keep an attack pattern to be sure a given form won't be attacked on the same field several times attack_pattern = HTTP.HTTPResource(form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params) if not attack_pattern in self.attackedPOST: self.attackedPOST.append(attack_pattern) code = self.random_string() if params_list is file_params: params_list[i][1][0] = code else: params_list[i][1] = code # will only memorize the last used payload (working or not) but the code will always be the good test_payload = HTTP.HTTPResource(form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params, referer=referer) self.POST_XSS[code] = (test_payload, param_name) try: resp = self.HTTP.send(test_payload) data = resp.getPage() except requests.exceptions.Timeout, timeout: data = "" resp = timeout # rapid search on the code to check injection if code in data: # found, now study where the payload is injected and how to exploit it payloads = self.generate_payloads(data, code) for payload in payloads: if params_list is file_params: params_list[i][1][0] = payload else: params_list[i][1] = payload evil_req = HTTP.HTTPResource( form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params, referer=referer) if self.verbose == 2: print(u"+ {0}".format(evil_req)) try: resp = self.HTTP.send(evil_req) dat = resp.getPage() except requests.exceptions.Timeout, timeout: dat = "" resp = timeout if self._validXSSContentType( evil_req ) and dat is not None and len(dat) > 1: if payload.lower() in dat.lower(): self.SUCCESSFUL_XSS[code] = payload self.logVuln( category=Vulnerability.XSS, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter=param_name, info=_( "XSS vulnerability found via injection" " in the parameter {0}").format( param_name)) self.logR(Vulnerability.MSG_PARAM_INJECT, self.MSG_VULN, evil_req.url, param_name) self.logR(Vulnerability.MSG_EVIL_REQUEST) self.logC(evil_req.http_repr) print('') # Stop injecting payloads and move to the next parameter break # restore the saved parameter in the list params_list[i][1] = saved_value
class Cache: def __init__(self, name, version, expire=0, size=0, step=100): self.name = name self.version = version self._connect() if expire: self.expire(expire) if size: self.size(size, step) def get(self, token, callback, *param): cur = self.db.cursor() cur.execute('select expire,data from cache where id=? limit 1', (token, )) row = cur.fetchone() cur.close() if row: if row[0] and row[0] < int(time.time()): pass else: try: obj = pickle.loads(row[1]) except: pass else: return obj response = callback(*param) if response[0]: obj = sqlite.Binary(pickle.dumps(response[1])) curtime = int(time.time()) cur = self.db.cursor() if isinstance(response[0], bool): cur.execute( 'replace into cache(id,addtime,expire,data) values(?,?,?,?)', (token, curtime, None, obj)) else: cur.execute( 'replace into cache(id,addtime,expire,data) values(?,?,?,?)', (token, curtime, curtime + response[0], obj)) self.db.commit() cur.close() return response[1] def expire(self, expire): # with rtrCache_lock: cur = self.db.cursor() cur.execute('delete from cache where addtime<?', (int(time.time()) - expire, )) self.db.commit() cur.close() def size(self, size, step=100): # with rtrCache_lock: while True: if os.path.getsize(self.filename) < size: break cur = self.db.cursor() cur.execute('select id from cache order by addtime asc limit ?', (step, )) rows = cur.fetchall() if not rows: cur.close() break cur.execute( 'delete from cache where id in (' + ','.join(len(rows) * '?') + ')', [x[0] for x in rows]) self.db.commit() cur.close() def flush(self): # with rtrCache_lock: cur = self.db.cursor() cur.execute('delete from cache') self.db.commit() cur.close() def _connect(self): with rtrCache_lock: dirname = xbmc.translatePath('special://temp') for subdir in ('xbmcup', 'plugin.video.torrenter'): dirname = os.path.join(dirname, subdir) if not xbmcvfs.exists(dirname): xbmcvfs.mkdir(dirname) self.filename = os.path.join(dirname, self.name) first = False if not xbmcvfs.exists(self.filename): first = True self.db = sqlite.connect(self.filename, check_same_thread=False) if not first: cur = self.db.cursor() try: cur.execute('select version from db_ver') row = cur.fetchone() if not row or float(row[0]) != self.version: cur.execute('drop table cache') cur.execute('drop table if exists db_ver') first = True except: cur.execute('drop table cache') first = True self.db.commit() cur.close() if first and not self.first_time(): cur = self.db.cursor() cur.execute('pragma auto_vacuum=1') cur.execute( 'create table cache(id varchar(255) unique, addtime integer, expire integer, data blob)' ) cur.execute('create index time on cache(addtime asc)') cur.execute('create table db_ver(version real)') cur.execute('insert into db_ver(version) values(?)', (self.version, )) self.db.commit() cur.close() def first_time(self): scrapers = { 'tvdb': 'TheTVDB.com', 'tmdb': 'TheMovieDB.org', 'kinopoisk': 'KinoPoisk.ru' } ok = xbmcgui.Dialog().yesno( Localization.localize('Content Lists'), Localization.localize('Do you want to preload full metadata?') + ' (%s)' % (scrapers[os.path.basename(self.filename).split('.')[0]]), Localization.localize('It is highly recommended!')) if ok: return self.download() else: return False def download(self): dirname = os.path.dirname(self.filename) zipname = os.path.basename(self.filename).replace('.db', '') + '.zip' url = 'http://www.tat-store.ru/torrenter/' + zipname self.http = HTTP() response = self.http.fetch(url, download=os.path.join(dirname, zipname), progress=True) if response.error: return False try: filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r') filezip.extractall(dirname) filezip.close() except: return False return True
def attackGET(self, http_res): """This method performs the cross site scripting attack (XSS attack) with method GET""" # copies page = http_res.path params_list = http_res.get_params resp_headers = http_res.headers referer = http_res.referer headers = {} if referer: headers["referer"] = referer # Some PHP scripts doesn't sanitize data coming from $_SERVER['PHP_SELF'] if page not in self.PHP_SELF: evil_req = None if page.endswith("/"): evil_req = HTTP.HTTPResource(page + self.php_self_payload) elif page.endswith(".php"): evil_req = HTTP.HTTPResource(page + "/" + self.php_self_payload) if evil_req is not None: if self.verbose == 2: print(u"+ {0}".format(evil_req.url)) data, http_code = self.HTTP.send( evil_req, headers=headers).getPageCode() if self._validXSSContentType( evil_req) and self.php_self_check in data: self.logR(Vulnerability.MSG_PATH_INJECT, self.MSG_VULN, page) self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url) self.logVuln( category=Vulnerability.XSS, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter="PHP_SELF", info= _("XSS vulnerability found via injection in the resource path" )) self.PHP_SELF.append(page) # page is the url of the script # params_list is a list of [key, value] lists if not params_list: # Do not attack application-type files if not "content-type" in resp_headers: # Sometimes there's no content-type... so we rely on the document extension if (page.split(".")[-1] not in self.allowed) and page[-1] != "/": return elif not "text" in resp_headers["content-type"]: return url = page + "?__XSS__" if url not in self.attackedGET: self.attackedGET.append(url) code = self.random_string() test_url = HTTP.HTTPResource(page + "?" + code) self.GET_XSS[code] = (test_url, "QUERY_STRING") try: resp = self.HTTP.send(test_url, headers=headers) data = resp.getPage() except requests.exceptions.Timeout: data = "" resp = None if code in data: payloads = self.generate_payloads(data, code) for payload in payloads: evil_req = HTTP.HTTPResource(page + "?" + self.HTTP.quote(payload)) if self.verbose == 2: print(u"+ {0}".format(evil_req)) try: resp = self.HTTP.send(evil_req, headers=headers) dat = resp.getPage() except requests.exceptions.Timeout, timeout: dat = "" resp = timeout param_name = "QUERY_STRING" if self._validXSSContentType( evil_req) and dat is not None and len(dat) > 1: if payload.lower() in dat.lower(): self.SUCCESSFUL_XSS[code] = payload self.logVuln( category=Vulnerability.XSS, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter=param_name, info= _("XSS vulnerability found via injection in the query string" )) self.logR(Vulnerability.MSG_QS_INJECT, self.MSG_VULN, page) self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url) # No more payload injection break
def attackGET(self, http_res): """This method performs the Blind SQL attack with method GET""" page = http_res.path params_list = http_res.get_params resp_headers = http_res.headers referer = http_res.referer headers = {} if referer: headers["referer"] = referer if not params_list: # Do not attack application-type files if not "content-type" in resp_headers: # Sometimes there's no content-type... so we rely on the document extension if (page.split(".")[-1] not in self.allowed) and page[-1] != "/": return elif not "text" in resp_headers["content-type"]: return pattern_url = page + "?__SQL__" if pattern_url in self.excludedGET: return if pattern_url not in self.attackedGET: self.attackedGET.append(pattern_url) err500 = 0 for payload in self.blind_sql_payloads: if "[VALUE]" in payload: continue payload = self.HTTP.quote( payload.replace("__TIME__", self.TIME_TO_SLEEP)) url = page + "?" + payload evil_req = HTTP.HTTPResource(url) if self.verbose == 2: print(u"+ {0}".format(evil_req.url)) try: resp = self.HTTP.send(evil_req, headers=headers) data, code = resp.getPageCode() except requests.exceptions.Timeout: self.logVuln( category=Vulnerability.BLIND_SQL_INJECTION, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter="QUERY_STRING", info=_("{0} via injection in the query string" ).format(self.MSG_VULN)) self.logR(Vulnerability.MSG_QS_INJECT, self.MSG_VULN, page) self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url) break else: if code == "500" and err500 == 0: err500 = 1 self.logAnom(category=Anomaly.ERROR_500, level=Anomaly.HIGH_LEVEL, request=evil_req, parameter="QUERY_STRING", info=Anomaly.MSG_QS_500) self.logO(Anomaly.MSG_500, page) self.logO(Anomaly.MSG_EVIL_URL, evil_req.url) else: for i in range(len(params_list)): saved_value = params_list[i][1] param_name = self.HTTP.quote(params_list[i][0]) params_list[i][1] = "__SQL__" pattern_url = page + "?" + self.HTTP.encode(params_list) # This field was successfully attacked with a non-blind SQL injection if pattern_url in self.excludedGET: params_list[i][1] = saved_value continue if pattern_url not in self.attackedGET: self.attackedGET.append(pattern_url) err500 = 0 for payload in self.blind_sql_payloads: payload = payload.replace("[VALUE]", saved_value) params_list[i][1] = self.HTTP.quote( payload.replace("__TIME__", self.TIME_TO_SLEEP)) url = page + "?" + self.HTTP.encode(params_list) evil_req = HTTP.HTTPResource(url) if self.verbose == 2: print(u"+ {0}".format(evil_req.url)) try: resp = self.HTTP.send(evil_req, headers=headers) data, code = resp.getPageCode() except requests.exceptions.Timeout: self.logVuln( category=Vulnerability.BLIND_SQL_INJECTION, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter=param_name, info=_("{0} via injection in " "the parameter {1}").format( self.MSG_VULN, param_name)) self.logR(Vulnerability.MSG_PARAM_INJECT, self.MSG_VULN, page, param_name) self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url) # One payload worked. Now jum to next field break else: if code == "500" and err500 == 0: err500 = 1 self.logAnom(category=Anomaly.ERROR_500, level=Anomaly.HIGH_LEVEL, request=evil_req, parameter=param_name, info=Anomaly.MSG_PARAM_500.format( param_name)) self.logO(Anomaly.MSG_500, page) self.logO(Anomaly.MSG_EVIL_URL, evil_req.url) params_list[i][1] = saved_value
def attackGET(self, http_res): """This method performs the file handling attack with method GET""" page = http_res.path params_list = http_res.get_params resp_headers = http_res.headers referer = http_res.referer headers = {} if referer: headers["referer"] = referer if not params_list: # Do not attack application-type files if not "content-type" in resp_headers: # Sometimes there's no content-type... so we rely on the document extension if (page.split(".")[-1] not in self.allowed) and page[-1] != "/": return elif not "text" in resp_headers["content-type"]: return timeouted = False warn = 0 inc = 0 err500 = 0 for payload in self.payloads: if "[VALUE]" in payload or "[DIRVALUE]" in payload or "[FILE_NAME]" in payload: continue err = "" url = page + "?" + self.HTTP.quote(payload) if url not in self.attackedGET: if self.verbose == 2: print(u"+ {0}".format(url)) self.attackedGET.append(url) evil_req = HTTP.HTTPResource(url) try: data, code = self.HTTP.send( evil_req, headers=headers).getPageCode() except requests.exceptions.Timeout: # Display a warning about timeout only once for a parameter if timeouted: continue data = "" code = "408" err = "" self.logAnom(category=Anomaly.RES_CONSUMPTION, level=Anomaly.MEDIUM_LEVEL, request=evil_req, info=Anomaly.MSG_QS_TIMEOUT) self.logO(Anomaly.MSG_TIMEOUT, page) self.logO(Anomaly.MSG_EVIL_URL, evil_req.url) timeouted = True else: err, inc, warn = self.__findPatternInResponse( data, warn) if err != "": self.logVuln( category=Vulnerability.FILE_HANDLING, level=Vulnerability.HIGH_LEVEL, request=evil_req, info=_("{0} via injection in the query string" ).format(err)) self.logR(Vulnerability.MSG_QS_INJECT, err) self.logR(Vulnerability.MSG_EVIL_URL) if inc: break else: if code == "500" and err500 == 0: err500 = 1 self.logAnom(category=Anomaly.ERROR_500, level=Anomaly.HIGH_LEVEL, request=evil_req, info=Anomaly.MSG_QS_500) self.logO(Anomaly.MSG_500, evil_req.path) self.logO(Anomaly.MSG_EVIL_URL, evil_req.url) for i in range(len(params_list)): timeouted = False warn = 0 inc = 0 err500 = 0 param_name = self.HTTP.quote(params_list[i][0]) saved_value = params_list[i][1] for payload in self.payloads: err = "" payload = payload.replace('[VALUE]', saved_value) payload = payload.replace('[DIRVALUE]', saved_value.rsplit('/', 1)[0]) payload = payload.replace('[FILE_NAME]', http_res.file_name) params_list[i][1] = self.HTTP.quote(payload) url = page + "?" + self.HTTP.encode(params_list) if url not in self.attackedGET: if self.verbose == 2: print(u"+ {0}".format(url)) self.attackedGET.append(url) evil_req = HTTP.HTTPResource(url) try: data, code = self.HTTP.send( evil_req, headers=headers).getPageCode() except requests.exceptions.Timeout: if timeouted: continue data = "" code = "408" err = "" self.logAnom( category=Anomaly.RES_CONSUMPTION, level=Anomaly.MEDIUM_LEVEL, request=evil_req, parameter=param_name, info=Anomaly.MSG_PARAM_TIMEOUT.format(param_name)) self.logO(Anomaly.MSG_TIMEOUT, page) self.logO(Anomaly.MSG_EVIL_URL, evil_req.url) timeouted = True else: err, inc, warn = self.__findPatternInResponse( data, warn) if err != "": self.logVuln( category=Vulnerability.FILE_HANDLING, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter=param_name, info=_("{0} via injection in the parameter {1}" ).format(err, param_name)) self.logR(Vulnerability.MSG_PARAM_INJECT, err, page, param_name) self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url) if inc: break else: if code == "500" and err500 == 0: err500 = 1 self.logAnom( category=Anomaly.ERROR_500, level=Anomaly.HIGH_LEVEL, request=evil_req, parameter=param_name, info=Anomaly.MSG_PARAM_500.format(param_name)) self.logO(Anomaly.MSG_500, evil_req.path) self.logO(Anomaly.MSG_EVIL_URL, evil_req.url) params_list[i][1] = saved_value
def attackGET(self, http_res): """This method performs the CRLF attack with method GET""" page = http_res.path params_list = http_res.get_params resp_headers = http_res.headers referer = http_res.referer headers = {} if referer: headers["referer"] = referer payload = self.HTTP.quote( "http://www.google.fr\r\nwapiti: SVN version") if not params_list: # Do not attack application-type files if not "content-type" in resp_headers: # Sometimes there's no content-type... so we rely on the document extension if (page.split(".")[-1] not in self.allowed) and page[-1] != "/": return elif not "text" in resp_headers["content-type"]: return url = page + "?" + payload if url not in self.attackedGET: evil_req = HTTP.HTTPResource(url) if self.verbose == 2: print(u"+ {0}".format(evil_req.url)) try: resp = self.HTTP.send(evil_req, headers=headers) if "wapiti" in resp.getHeaders(): self.logVuln(category=Vulnerability.CRLF, level=Vulnerability.HIGH_LEVEL, request=evil_req, info=self.MSG_VULN + " " + _("(QUERY_STRING)")) self.logR(Vulnerability.MSG_QS_INJECT, self.MSG_VULN, page) self.logR(Vulnerability.MSG_EVIL_URL, url) except requests.exceptions.Timeout: self.logAnom(category=Anomaly.RES_CONSUMPTION, level=Anomaly.MEDIUM_LEVEL, request=evil_req, info=self.MSG_VULN + " " + _("(QUERY_STRING)")) self.logO(Anomaly.MSG_TIMEOUT, page) self.logO(Anomaly.MSG_EVIL_URL, url) except requests.exceptions.HTTPError: # print("Error: The server did not understand this request") pass self.attackedGET.append(url) else: for i in range(len(params_list)): saved_value = params_list[i][1] # payload is already escaped, see at top params_list[i][1] = payload param_name = self.HTTP.quote(params_list[i][0]) url = page + "?" + self.HTTP.encode(params_list) if url not in self.attackedGET: self.attackedGET.append(url) evil_req = HTTP.HTTPResource(url) if self.verbose == 2: print(u"+ {0}".format(evil_req.url)) try: resp = self.HTTP.send(evil_req, headers=headers) if "wapiti" in resp.getHeaders(): self.logVuln(category=Vulnerability.CRLF, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter=param_name, info=self.MSG_VULN + " (" + param_name + ")") self.logR(Vulnerability.MSG_PARAM_INJECT, self.MSG_VULN, page, param_name) self.logR(Vulnerability.MSG_EVIL_URL, url) except requests.exceptions.Timeout: self.logAnom(category=Anomaly.RES_CONSUMPTION, level=Anomaly.MEDIUM_LEVEL, request=evil_req, parameter=param_name, info="Timeout (" + param_name + ")") self.logO(Anomaly.MSG_TIMEOUT, page) self.logO(Anomaly.MSG_EVIL_URL, url) except requests.exceptions.HTTPError: self.log( _("Error: The server did not understand this request" )) params_list[i][1] = saved_value
def attackPOST(self, form): """This method performs the Blind SQL attack with method POST""" # copies get_params = form.get_params post_params = form.post_params file_params = form.file_params referer = form.referer for params_list in [get_params, post_params, file_params]: for i in xrange(len(params_list)): saved_value = params_list[i][1] param_name = self.HTTP.quote(params_list[i][0]) if params_list is file_params: params_list[i][1] = ["_SQL__", params_list[i][1][1]] else: params_list[i][1] = "__SQL__" attack_pattern = HTTP.HTTPResource(form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params) if attack_pattern in self.excludedPOST: params_list[i][1] = saved_value continue err500 = 0 if attack_pattern not in self.attackedPOST: self.attackedPOST.append(attack_pattern) for payload in self.blind_sql_payloads: if params_list is file_params: payload = payload.replace("[VALUE]", saved_value[0]) params_list[i][1][0] = payload.replace( "__TIME__", self.TIME_TO_SLEEP) else: payload = payload.replace("[VALUE]", saved_value) params_list[i][1] = payload.replace( "__TIME__", self.TIME_TO_SLEEP) evil_req = HTTP.HTTPResource(form.path, method=form.method, get_params=get_params, post_params=post_params, file_params=file_params, referer=referer) if self.verbose == 2: print(u"+ {0}".format(evil_req)) try: resp = self.HTTP.send(evil_req) data, code = resp.getPageCode() except requests.exceptions.Timeout: # Timeout means time-based SQL injection self.logVuln( category=Vulnerability.BLIND_SQL_INJECTION, level=Vulnerability.HIGH_LEVEL, request=evil_req, parameter=param_name, info=_("{0} via injection in the " "parameter {1}").format( self.MSG_VULN, param_name)) self.logR(Vulnerability.MSG_PARAM_INJECT, self.MSG_VULN, evil_req.url, param_name) self.logR(Vulnerability.MSG_EVIL_REQUEST) self.logC(evil_req.http_repr) print('') break else: if code == "500" and err500 == 0: err500 = 1 self.logAnom(category=Anomaly.ERROR_500, level=Anomaly.HIGH_LEVEL, request=evil_req, parameter=param_name, info=Anomaly.MSG_PARAM_500.format( param_name)) self.logO(Anomaly.MSG_500, evil_req.url) self.logO(Anomaly.MSG_EVIL_REQUEST) self.logC(evil_req.http_repr) print('') params_list[i][1] = saved_value
class LibraryManager(): def __init__(self, dest_path, platform): self.dest_path = dest_path self.platform = platform self.root = os.path.dirname(os.path.dirname(__file__)) def check_exist(self): for libname in get_libname(self.platform): if not xbmcvfs.exists(os.path.join(self.dest_path, libname)): return False return True def check_update(self): need_update = False for libname in get_libname(self.platform): if libname != 'liblibtorrent.so': self.libpath = os.path.join(self.dest_path, libname) self.sizepath = os.path.join(self.root, self.platform['system'], self.platform['version'], libname + '.size.txt') size = str(os.path.getsize(self.libpath)) size_old = open(self.sizepath, "r").read() if size_old != size: need_update = True return need_update def update(self): if self.check_update(): for libname in get_libname(self.platform): self.libpath = os.path.join(self.dest_path, libname) xbmcvfs.delete(self.libpath) self.download() def download(self): __settings__ = xbmcaddon.Addon(id='plugin.video.alfa') ### Alfa xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], self.platform['version'], libname) if libname != 'liblibtorrent.so': try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=False) ### Alfa log("%s -> %s" % (url, dest)) xbmc.executebuiltin( 'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__, text, 750, __icon__)) else: xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest, silent=True) ### Alfa dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Path')), \ 'lib', libname) ### Alfa xbmcvfs.copy(dest, dest_alfa, silent=True) ### Alfa dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Profile')), \ 'custom_code', 'lib', libname) ### Alfa xbmcvfs.copy(dest, dest_alfa, silent=True) ### Alfa return True def android_workaround(self, new_dest_path): ### Alfa (entera) import subprocess for libname in get_libname(self.platform): libpath = os.path.join(self.dest_path, libname) size = str(os.path.getsize(libpath)) new_libpath = os.path.join(new_dest_path, libname) if xbmcvfs.exists(new_libpath): new_size = str(os.path.getsize(new_libpath)) if size != new_size: xbmcvfs.delete(new_libpath) if xbmcvfs.exists(new_libpath): try: command = ['su', '-c', 'rm', '%s' % new_libpath] p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output_cmd, error_cmd = p.communicate() log('Comando ROOT: %s' % str(command)) except: log('Sin PERMISOS ROOT: %s' % str(command)) if not xbmcvfs.exists(new_libpath): log('Deleted: (%s) %s -> (%s) %s' % (size, libpath, new_size, new_libpath)) if not xbmcvfs.exists(new_libpath): xbmcvfs.copy(libpath, new_libpath, silent=True) ### ALFA log('Copying... %s -> %s' % (libpath, new_libpath)) if not xbmcvfs.exists(new_libpath): try: command = [ 'su', '-c', 'cp', '%s' % libpath, '%s' % new_libpath ] p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output_cmd, error_cmd = p.communicate() log('Comando ROOT: %s' % str(command)) command = [ 'su', '-c', 'chmod', '775', '%s' % new_libpath ] p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output_cmd, error_cmd = p.communicate() log('Comando ROOT: %s' % str(command)) except: log('Sin PERMISOS ROOT: %s' % str(command)) if not xbmcvfs.exists(new_libpath): log('ROOT Copy Failed!') else: command = ['chmod', '775', '%s' % new_libpath] p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output_cmd, error_cmd = p.communicate() log('Comando: %s' % str(command)) else: log('Module exists. Not copied... %s' % new_libpath) ### ALFA return new_dest_path
class KinoPoisk: """ API: scraper - скрапер movie - профайл фильма search - поиск фильма best - поиск лучших фильмов person - поиск персон work - информация о работах персоны """ def __init__(self): self.cache = Cache('kinopoisk.db', 1.0) self.html = Clear() self.http = HTTP() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.kinopoisk.ru/level/7/' } # API def scraper(self, search, year=None, trailer_quality=None): try: if isinstance(search, list): search = search[0] or "" tag = 'scraper:' + urllib.quote_plus(search.encode('windows-1251')) except Exception: return None else: if year: tag += ':' + str(year) id = self.cache.get(tag, self._scraper, search, year) if not id: return None return self.movie(id, trailer_quality) def movie(self, id, trailer_quality=None): id = str(id) if trailer_quality is None: trailer_quality = 6 movie = self.cache.get('movie:' + id, self._movie, id) if not movie: return None if 'trailers' in movie and movie['trailers']: # компилируем список с нужным нам качеством video = [] for m in movie['trailers']: url = [x for x in m['video'] if x[0] <= trailer_quality] if url: m['video'] = url[-1] video.append(m) movie['trailers'] = video if movie['trailers']: # готовим главный трейлер r = [x for x in movie['trailers'] if x['trailer']] if r: movie['info']['trailer'] = r[0]['video'][1] else: # если трейлер не найден, то отдаем что попало... movie['info']['trailer'] = movie['trailers'][0]['video'][1] return movie def search(self, name, trailer_quality=None): return self._search_movie(name) def best(self, **kwarg): page = kwarg.get('page', 1) limit = kwarg.get('limit', 50) url = 'http://www.kinopoisk.ru/top/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str(kwarg.get('votes', 100)) + '/' if kwarg.get('dvd'): url += 'm_act%5Bis_dvd%5D/on/' if kwarg.get('decade'): url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/' if kwarg.get('genre'): url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/' if kwarg.get('country'): url += 'm_act%5Bcountry%5D/' + str(kwarg['country']) + '/' if kwarg.get('rate'): url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/' if kwarg.get('mpaa'): url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/' url += 'perpage/' + str(limit) + '/order/ex_rating/' if page > 1: url += 'page/' + str(page) + '/' response = self.http.fetch(url, headers=self.headers) if response.error: return None res = {'pages': (1, 0, 1, 0), 'data': []} r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo', re.U | re.S).search(response.body.decode('windows-1251')) if r: body = r.group(1) # compile pagelist p = re.compile('>([0-9]+)—[0-9]+[^0-9]+?([0-9]+)', re.U).search(body) if p: page = (int(p.group(1)) - 1) / limit + 1 total = int(p.group(2)) pages = total / limit if limit * pages != total: pages += 1 res['pages'] = (pages, 0 if page == 1 else page - 1, page, 0 if page == pages else page + 1) # end compile for id in re.compile('<div id="tr_([0-9]+)"', re.U | re.S).findall(body): res['data'].append(int(id)) return res def person(self, name): response = self.http.fetch('http://www.kinopoisk.ru/s/type/people/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) + '/order/relevant/', headers=self.headers) if response.error: return None res = [] body = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(response.body.decode('windows-1251')) if body: for block in re.compile('<p class="pic">(.+?)<div class="clear">', re.U | re.S).findall(body.group(1)): id, name, original, year, poster = None, None, None, None, None r = re.compile('<p class="name"><a href="http://www\.kinopoisk\.ru/level/4/people/([0-9]+)[^>]+>([^<]+)</a>', re.U | re.S).search(block) if r: id = r.group(1) name = r.group(2).strip() if id and name: r = re.compile('<span class="gray">([^<]+)</span>', re.U | re.S).search(block) if r: original = r.group(1).strip() if not original: original = None r = re.compile('<span class="year">([0-9]{4})</span>', re.U | re.S).search(block) if r: year = int(r.group(1)) if block.find('no-poster.gif') == -1: poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg' res.append({'id': int(id), 'name': name, 'originalname': original, 'year': year, 'poster': poster}) return {'pages': (1, 0, 1, 0), 'data': res} def work(self, id): response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) + '/', headers=self.headers) if response.error: return None res = {} r = re.compile('id="sort_block">(.+?)<style>', re.U | re.S).search(response.body.decode('windows-1251')) if r: for block in r.group(1).split(u'<table cellspacing="0" cellpadding="0" border="0" width="100%">'): work = None for w in ('actor', 'director', 'writer', 'producer', 'producer_ussr', 'composer', 'operator', 'editor', 'design', 'voice', 'voice_director'): if block.find(u'id="' + w + u'"') != -1: work = 'producer' if w == 'producer_ussr' else w break if work: movies = [] for id, name in re.compile('<span class="name"><a href="/film/([0-9]+)/" >([^<]+?)</a>', re.U).findall(block): for tag in (u'(мини-сериал)', u'(сериал)'): if name.find(tag) != -1: break else: movies.append(int(id)) if movies: res.setdefault(work, []).extend(movies) return res def review(self, id, query): query_s = 'all' if query == 'stat' else query data = self.cache.get('review:' + str(id) + ':' + query_s, self._review, id, query_s) if not data: return data return data[query] def countries(self): return COUNTRIES def country(self, id, default=None): country = [x[1] for x in COUNTRIES if x[0] == id] return country[0] if country else default # PRIVATE def _search_movie(self, name, year=None): url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) # + '/order/relevant' if year: url += '/m_act%5Byear%5D/' + str(year) url += '/m_act%5Btype%5D/film/' response = self.http.fetch(url, headers=self.headers) if response.error: return None res = [] r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(response.body.decode('windows-1251')) if r: for id in re.compile('<p class="name"><a href="/level/1/film/([0-9]+)', re.U | re.S).findall(r.group(1)): res.append(int(id)) return {'pages': (1, 0, 1, 0), 'data': res} def _scraper(self, name, year): timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if year and year >= time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 # week ids = self._search_movie(name, year) if ids is None: return False, None elif not ids['data']: # сохраняем пустой результат на 3-е суток return 259200, None else: return timeout, ids['data'][0] def _review(self, id, query): url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/' if query in ('good', 'bad', 'neutral'): url += 'status/' + query + '/' url += 'perpage/200/' response = self.http.fetch(url, headers=self.headers) if response.error: return False, None html = response.body.decode('windows-1251') res = { 'stat': {'all': 0, 'good': 0, 'bad': 0, 'neutral': 0}, query: [] } r = re.compile('<ul class="resp_type">(.+?)</ul>', re.U | re.S).search(html) if r: ul = r.group(1) for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')): r = re.compile('<li class="' + q + '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>', re.U).search(ul) if r: res['stat'][t] = int(r.group(1)) res['stat']['all'] = res['stat']['good'] + res['stat']['bad'] + res['stat']['neutral'] r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(html) if r: for block in r.group(1).split('itemprop="reviews"'): review = { 'nick': None, 'count': None, 'title': None, 'review': None, 'time': None } r = re.compile('itemprop="reviewBody">(.+?)</div>', re.U | re.S).search(block) if r: text = r.group(1) for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'), (u'</b>', u'[/B]'), (u'<i>', u'[I]'), (u'</i>', u'[/I]'), (u'<u>', u'[U]'), (u'</u>', u'[/U]')): text = text.replace(tag1, tag2) r = self.html.text(text) if r: review['review'] = r user = None r = re.compile('<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>').search(block) if r: user = self.html.string(r.group(1)) else: r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>').search(block) if r: user = self.html.string(r.group(1)) if user: review['nick'] = user r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search(block) if r: title = self.html.string(r.group(1)) if title: review['title'] = title r = re.compile('<span class="date">([^<]+)</span>', re.U | re.S).search(block) if r: review['time'] = r.group(1).replace(u' |', u',') r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>', re.U | re.S).search(block) if r: review['count'] = int(r.group(1)) if review['nick'] and review['review']: res[query].append(review) return 3600, res # one hour def _movie(self, id): response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers) if response.error: return False, None html = response.body.decode('windows-1251') res = { 'icon': None, 'thumbnail': None, 'info': { 'count': int(id) }, 'properties': { 'fanart_image': None, }, } # имя, оригинальное имя, девиз, цензура, год, top250 # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла) for tag, reg, t in ( ('title', '<title>(.+?)</title>', 'str'), ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', 'str'), ('tagline', '<td style="color\: #555">«(.+?)»</td></tr>', 'str'), ('mpaa', 'itemprop="contentRating"\s+content="MPAA\s+([^"]+)"', 'str'), ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>', 'str'), ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', 'int'), ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', 'int') ): r = re.compile(reg, re.U).search(html) if r: value = r.group(1).strip() if value: res['info'][tag] = value if t == 'int': res['info'][tag] = int(res['info'][tag]) else: res['info'][tag] = self.html.string(res['info'][tag]) # режисеры, сценаристы, жанры for tag, reg in ( ('director', u'<td itemprop="director">(.+?)</td>'), ('writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'), ('genre', u'<td itemprop="genre">(.+?)</td>') ): r = re.compile(reg, re.U | re.S).search(html) if r: r2 = [] for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)): r = self.html.string(r) if r and r != '...': r2.append(r) if r2: res['info'][tag] = u', '.join(r2) # актеры r = re.compile(u'<h4>В главных ролях:</h4><ul>(.+?)</ul>', re.U | re.S).search(html) if r: actors = [] for r in re.compile('<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)): r = self.html.string(r) if r and r != '...': actors.append(r) if actors: res['info']['cast'] = actors[:] # res['info']['castandrole'] = actors[:] # описание фильма r = re.compile('<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>', re.U).search(html) if r: plot = self.html.text(r.group(1).replace('<=end=>', '\n')) if plot: res['info']['plot'] = plot # IMDB r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html) if r: res['info']['rating'] = float(r.group(1).strip()) res['info']['votes'] = r.group(2).strip() # # премьера # r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U|re.S).search(html) # if r: # r = re.compile(u'data\-ical\-date="([^"]+)"', re.U|re.S).search(r.group(1)) # if r: # data = r.group(1).split(' ') # if len(data) == 3: # i = 0 # for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'): # i += 1 # if mon == data[1]: # mon = str(i) # if len(mon) == 1: # mon = '0' + mon # day = data[0] # if len(day) == 1: # day = '0' + day # res['info']['premiered'] = '-'.join([data[2], mon, day]) # break # постер r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U | re.S).search(html) if r: poster = r.group(1).replace("'", '').strip() if poster: if poster.startswith("/"): poster = "http://www.kinopoisk.ru%s" % poster res['icon'] = poster res['thumbnail'] = poster menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)</ul>', re.U | re.S).search(html) if menu: menu = menu.group(1) # фанарт if menu.find('/film/' + id + '/wall/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers) if not response.error: html = response.body.decode('windows-1251') fanart = re.compile('<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html) if fanart: fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2))) # пробуем взять максимально подходящее fanart_best = [x for x in fanart if int(x[1]) <= 1280] if fanart_best: fanart = fanart_best response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/', headers=self.headers) if not response.error: html = response.body.decode('windows-1251') r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html) if r: res['properties']['fanart_image'] = r.group(1).strip() # если нет фанарта (обоев), то пробуем получить кадры if not res['properties']['fanart_image'] and menu.find('/film/' + id + '/stills/') != -1: response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers) if not response.error: html = response.body.decode('windows-1251') fanart = re.compile('<a href="/picture/([0-9]+)/"><img src="[^<]+</a>[^<]+<b><i>([0-9]+)×([0-9]+)</i>', re.U).findall(html) if fanart: fanart.sort(cmp=lambda (id1, size1, t1), (id2, size2, t2): cmp(int(size1), int(size2))) # пробуем взять максимально подходящее fanart_best = [x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2])] if fanart_best: fanart = fanart_best response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/', headers=self.headers) if not response.error: html = response.body.decode('windows-1251') r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html) if r: res['properties']['fanart_image'] = r.group(1).strip() # # студии # if menu.find('/film/' + id + '/studio/') != -1: # response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers) # if not response.error: # html = response.body.decode('windows-1251') # r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U|re.S).search(html) # if r: # studio = [] # for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)): # r = self.html.string(r) # if r: # studio.append(r) # if studio: # res['info']['studio'] = u', '.join(studio) # трэйлеры # trailers1 = [] # русские трейлеры # trailers2 = [] # другие русские видео # trailers3 = [] # трейлеры # trailers4 = [] # другие видео # if menu.find('/film/' + id + '/video/') != -1: # response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/video/', headers=self.headers) # if not response.error: # html = response.body.decode('windows-1251') # for row in re.compile(u'<!-- ролик -->(.+?)<!-- /ролик -->', re.U|re.S).findall(html): # # отсекаем лишние блоки # if row.find(u'>СМОТРЕТЬ</a>') != -1: # # русский ролик? # if row.find('class="flag flag2"') == -1: # is_ru = False # else: # is_ru = True # # получаем имя трейлера # r = re.compile('<a href="/film/' + id + '/video/[0-9]+/[^>]+ class="all">(.+?)</a>', re.U).search(row) # if r: # name = self.html.string(r.group(1)) # if name: # trailer = { # 'name': name, # 'time': None, # 'trailer': False, # 'ru': is_ru, # 'video': [] # } # # трейлер или тизер? # for token in (u'Трейлер', u'трейлер', u'Тизер', u'тизер'): # if name.find(token) != -1: # trailer['trailer'] = True # break # # получаем время трейлера # r = re.compile(u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>', re.U|re.S).search(row) # if r: # trailer['time'] = r.group(1).strip() # # делим ролики по качеству # for r in re.compile('trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>', re.U|re.S).findall(row): # quality = int(r[0]) # if r[1].find('icon-hd') != -1: # quality += 3 # trailer['video'].append((quality, r[2].strip(), r[3])) # if trailer['video']: # if trailer['ru']: # if trailer['trailer']: # trailers1.append(trailer) # else: # trailers2.append(trailer) # else: # if trailer['trailer']: # trailers3.append(trailer) # else: # trailers4.append(trailer) # # склеиваем трейлеры # res['trailers'].extend(trailers1) # res['trailers'].extend(trailers2) # res['trailers'].extend(trailers3) # res['trailers'].extend(trailers4) timeout = True # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте) if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year: timeout = 7 * 24 * 60 * 60 # week return timeout, res
class LibraryManager(): def __init__(self, dest_path, platform): self.dest_path = dest_path self.platform = platform self.root = os.path.dirname(os.path.dirname(__file__)) def check_exist(self): for libname in get_libname(self.platform): if not xbmcvfs.exists(os.path.join(self.dest_path, libname)): return False return True def check_update(self): need_update = False for libname in get_libname(self.platform): if libname != 'liblibtorrent.so': self.libpath = os.path.join(self.dest_path, libname) self.sizepath = os.path.join(self.root, self.platform['system'], self.platform['version'], libname + '.size.txt') size = str(os.path.getsize(self.libpath)) size_old = open(self.sizepath, "r").read() if size_old != size: need_update = True return need_update def update(self): if self.check_update(): for libname in get_libname(self.platform): self.libpath = os.path.join(self.dest_path, libname) xbmcvfs.delete(self.libpath) self.download() def download(self): xbmcvfs.mkdirs(self.dest_path) for libname in get_libname(self.platform): dest = os.path.join(self.dest_path, libname) log("try to fetch %s" % libname) url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], self.platform['version'], libname) if libname != 'liblibtorrent.so': try: self.http = HTTP() self.http.fetch(url, download=dest + ".zip", progress=True) log("%s -> %s" % (url, dest)) xbmc.executebuiltin( 'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True) xbmcvfs.delete(dest + ".zip") except: text = 'Failed download %s!' % libname xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__, text, 750, __icon__)) else: xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest) return True def android_workaround(self, new_dest_path): for libname in get_libname(self.platform): libpath = os.path.join(self.dest_path, libname) size = str(os.path.getsize(libpath)) new_libpath = os.path.join(new_dest_path, libname) if not xbmcvfs.exists(new_libpath): xbmcvfs.copy(libpath, new_libpath) log('Copied %s -> %s' % (libpath, new_libpath)) else: new_size = str(os.path.getsize(new_libpath)) if size != new_size: xbmcvfs.delete(new_libpath) xbmcvfs.copy(libpath, new_libpath) log('Deleted and copied (%s) %s -> (%s) %s' % (size, libpath, new_size, new_libpath)) return new_dest_path
# Search for permanent XSS vulns which were injected via GET if self.doGET == 1: for code in self.GET_XSS: if code in data: # code found in the webpage ! code_url = self.GET_XSS[code][0].url page = self.GET_XSS[code][0].path param_name = self.GET_XSS[code][1] if code in self.SUCCESSFUL_XSS: # is this an already known vuln (reflected XSS) if self.validXSS(data, code, self.SUCCESSFUL_XSS[code]): # if we can find the payload again, this is a stored XSS evil_req = HTTP.HTTPResource( code_url.replace( code, self.SUCCESSFUL_XSS[code])) if param_name == "QUERY_STRING": self.logR(Vulnerability.MSG_QS_INJECT, self.MSG_VULN, page) else: self.logR(Vulnerability.MSG_PARAM_INJECT, self.MSG_VULN, page, param_name) self.logR(Vulnerability.MSG_EVIL_URL, code_url) self.logVuln( category=Vulnerability.XSS, level=Vulnerability.HIGH_LEVEL, request=evil_req, info=_("Found permanent XSS in {0}"