Exemplo n.º 1
0
 def download(self):
     __settings__ = xbmcaddon.Addon(id='plugin.video.alfa')  ### Alfa
     xbmcvfs.mkdirs(self.dest_path)
     for libname in get_libname(self.platform):
         dest = os.path.join(self.dest_path, libname)
         log("try to fetch %s" % libname)
         url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'],
                                    self.platform['version'], libname)
         if libname != 'liblibtorrent.so':
             try:
                 self.http = HTTP()
                 self.http.fetch(url,
                                 download=dest + ".zip",
                                 progress=False)  ### Alfa
                 log("%s -> %s" % (url, dest))
                 xbmc.executebuiltin(
                     'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path),
                     True)
                 xbmcvfs.delete(dest + ".zip")
             except:
                 text = 'Failed download %s!' % libname
                 xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" %
                                     (__plugin__, text, 750, __icon__))
         else:
             xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'),
                          dest,
                          silent=True)  ### Alfa
         dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Path')), \
                         'lib', libname)                                     ### Alfa
         xbmcvfs.copy(dest, dest_alfa, silent=True)  ### Alfa
         dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Profile')), \
                         'custom_code', 'lib', libname)                      ### Alfa
         xbmcvfs.copy(dest, dest_alfa, silent=True)  ### Alfa
     return True
Exemplo n.º 2
0
 def __init__(self):
     self.api_key = '1D62F2F90030C444'
     
     self.cache = Cache('tvdb.db', 1.0)
     
     self.http = HTTP()
     self.headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
         'Cache-Control': 'no-cache',
         'Referer': 'http://www.thetvdb.com/'
     }
Exemplo n.º 3
0
    def __init__(self):
        self.cache = Cache('kinopoisk.db', 1.0)
        self.html = Clear()

        self.http = HTTP()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }
Exemplo n.º 4
0
    def __init__(self, language='en'):
        self.api_key = '33DBB309BB2B0ADB'
        dbname='tvdb.%s.db' % language
        self.cache = Cache(dbname, 1.0)

        self.language = language

        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }
Exemplo n.º 5
0
 def __init__(self, root_url):
     self.target_url = root_url
     server = urlparse.urlparse(root_url).netloc
     self.http_engine = HTTP.HTTP(server)
     self.myls = lswww.lswww(root_url, http_engine=self.http_engine)
     self.xmlRepGenParser = ReportGeneratorsXMLParser()
     self.xmlRepGenParser.parse(
         os.path.join(CONF_DIR, "config/reports/generators.xml"))
Exemplo n.º 6
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform

    def check_update(self):
        need_update = False
        if __settings__.getSetting('plugin_name') != __plugin__:
            __settings__.setSetting('plugin_name', __plugin__)
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath = os.path.join(self.dest_path,
                                             libname + '.size.txt')
                size = str(os.path.getsize(self.libpath))
                size_old = open(self.sizepath, "r").read()
                if size_old != size:
                    need_update = True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname)
            try:
                self.http = HTTP()
                self.http.fetch(url, download=dest + ".zip", progress=True)
                log("%s -> %s" % (url, dest))
                xbmc.executebuiltin(
                    'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path),
                    True)
                xbmcvfs.delete(dest + ".zip")
            except:
                text = 'Failed download %s!' % libname
                xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" %
                                    (__plugin__, text, 750))
        return True
Exemplo n.º 7
0
    def download(self):
        dirname = os.path.dirname(self.filename)
        zipname = os.path.basename(self.filename).replace('.db', '') + '.zip'
        url = 'http://www.tat-store.ru/torrenter/' + zipname
        self.http = HTTP()
        response = self.http.fetch(url,
                                   download=os.path.join(dirname, zipname),
                                   progress=True)
        if response.error:
            return False

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r')
            filezip.extractall(dirname)
            filezip.close()
        except:
            return False

        return True
Exemplo n.º 8
0
 def download(self):
     xbmcvfs.mkdirs(self.dest_path)
     for libname in get_libname(self.platform):
         dest = os.path.join(self.dest_path, libname)
         log("try to fetch %s" % libname)
         url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], self.platform['version'], libname)
         if libname!='liblibtorrent.so':
             try:
                 self.http = HTTP()
                 self.http.fetch(url, download=dest + ".zip", progress=True)
                 log("%s -> %s" % (url, dest))
                 xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
                 xbmcvfs.delete(dest + ".zip")
             except:
                 text = 'Failed download %s!' % libname
                 xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__,text,750,__icon__))
         else:
             xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest)
     return True
Exemplo n.º 9
0
 def download(self):
     xbmcvfs.mkdirs(self.dest_path)
     for libname in get_libname(self.platform):
         dest = os.path.join(self.dest_path, libname)
         log("try to fetch %s" % libname)
         url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname)
         try:
             self.http = HTTP()
             self.http.fetch(url, download=dest + ".zip", progress=True)
             log("%s -> %s" % (url, dest))
             xbmc.executebuiltin(
                 'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path),
                 True)
             xbmcvfs.delete(dest + ".zip")
         except:
             text = 'Failed download %s!' % libname
             xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" %
                                 (__plugin__, text, 750))
     return True
Exemplo n.º 10
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform

    def check_update(self):
        need_update=False
        if __settings__.getSetting('plugin_name')!=__plugin__:
            __settings__.setSetting('plugin_name', __plugin__)
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath=os.path.join(self.dest_path, libname+'.size.txt')
                size=str(os.path.getsize(self.libpath))
                size_old=open( self.sizepath, "r" ).read()
                if size_old!=size:
                    need_update=True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname)
            try:
                self.http = HTTP()
                self.http.fetch(url, download=dest + ".zip", progress=True)
                log("%s -> %s" % (url, dest))
                xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
                xbmcvfs.delete(dest + ".zip")
            except:
                text = 'Failed download %s!' % libname
                xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" % (__plugin__,text,750))
        return True
Exemplo n.º 11
0
    def __init__(self):
        self.cache = Cache('kinopoisk.db', 1.0)
        self.html = Clear()

        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }
Exemplo n.º 12
0
 def __init__(self):
     self.api_key = '33DBB309BB2B0ADB'
     
     self.cache = Cache('tvdb.db', 1.0)
     
     self.http = HTTP()
     self.headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
         'Cache-Control': 'no-cache',
         'Referer': 'http://www.thetvdb.com/'
     }
Exemplo n.º 13
0
    def attackGET(self, http_res):
        page = http_res.path
        resp_headers = http_res.headers
        referer = http_res.referer
        headers = {}
        if referer:
            headers["referer"] = referer

        url = page
        if url not in self.attackedGET:
            if self.verbose == 2:
                print(u"+ {0}".format(url))

            err1 = self.__returnErrorByCode(resp_headers["status_code"])

            if err1 != "ok":
                data1 = self.HTTP.send(url, headers=headers).getPage()
                # .htaccess protection detected
                if self.verbose >= 1:
                    self.log(_("HtAccess protection found: {0}"), url)

                evil_req = HTTP.HTTPResource(url, method="ABC")
                data2, code2 = self.HTTP.send(evil_req, headers=headers).getPageCode()
                err2 = self.__returnErrorByCode(code2)

                if err2 == "ok":
                    # .htaccess bypass success

                    if self.verbose >= 1:
                        self.logC(_("|HTTP Code: {0} : {1}"), resp_headers["status_code"], err1)

                    if self.verbose == 2:
                        self.logY(_("Source code:"))
                        self.logW(data1)

                    # report xml generator (ROMULUS) not implemented for htaccess
                    self.logVuln(category=Vulnerability.HTACCESS,
                                 level=Vulnerability.HIGH_LEVEL,
                                 request=evil_req,
                                 info=_("{0} HtAccess").format(err1))
                    self.logR(_("  .htaccess bypass vulnerability: {0}"), evil_req.url)

                    # print output informations by verbosity option
                    if self.verbose >= 1:
                        self.logC(_("|HTTP Code: {0}"), code2)

                    if self.verbose == 2:
                        self.logY(_("Source code:"))
                        self.logW(data2)

                self.attackedGET.append(url)
class DownloaderClass():
    def __init__(self, dest_path):
        self.dest_path = dest_path
        self.platform = get_platform()
        tempdir(self.dest_path)

    def tools_download(self):
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], libname)
            if libname!='liblibtorrent.so':
                try:
                    self.http = HTTP()
                    self.http.fetch(url, download=dest + ".zip", progress=True)
                    log("%s -> %s" % (url, dest))
                    xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
                    xbmcvfs.delete(dest + ".zip")
                except:
                    text = 'Failed download %s!' % libname
                    xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__,text,750,__icon__))
            else:
                x=xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest)
        return True
Exemplo n.º 15
0
 def download(self):
     xbmcvfs.mkdirs(self.dest_path)
     for libname in get_libname(self.platform):
         dest = os.path.join(self.dest_path, libname)
         log("try to fetch %s" % libname)
         url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname)
         try:
             self.http = HTTP()
             self.http.fetch(url, download=dest + ".zip", progress=True)
             log("%s -> %s" % (url, dest))
             xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
             xbmcvfs.delete(dest + ".zip")
         except:
             text = 'Failed download %s!' % libname
             xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" % (__plugin__,text,750))
     return True
Exemplo n.º 16
0
    def download(self):
        dirname = os.path.dirname(self.filename)
        zipname = os.path.basename(self.filename).replace('.db', '') + '.zip'
        url = 'http://www.tat-store.ru/torrenter/' + zipname
        self.http = HTTP()
        response = self.http.fetch(url, download=os.path.join(dirname, zipname), progress=True)
        if response.error:
            return False

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r')
            filezip.extractall(dirname)
            filezip.close()
        except:
            return False

        return True
Exemplo n.º 17
0
    def __end_element(self, name):
        if name == self.RESOURCE:
            http_res = HTTP.HTTPResource(self.path,
                                         method=self.method,
                                         encoding=self.encoding,
                                         referer=self.referer,
                                         get_params=self.get_params,
                                         post_params=self.post_params,
                                         file_params=self.file_params)
            http_res.setHeaders(self.headers)

            if self.array is self.toBrowse:
                self.toBrowse.append(http_res)
            else:
                if self.method == "GET":
                    self.browsed.append(http_res)
                elif self.method == "POST":
                    self.forms.append(http_res)
 def download(self):
     xbmcvfs.mkdirs(self.dest_path)
     for libname in get_libname(self.platform):
         dest = os.path.join(self.dest_path, libname)
         log("try to fetch %s" % libname)
         url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform["system"], self.platform["version"], libname)
         if libname != "liblibtorrent.so":
             try:
                 self.http = HTTP()
                 self.http.fetch(url, download=dest + ".zip", progress=True)
                 log("%s -> %s" % (url, dest))
                 xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
                 xbmcvfs.delete(dest + ".zip")
             except:
                 text = "Failed download %s!" % libname
                 xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__, text, 750, __icon__))
         else:
             xbmcvfs.copy(os.path.join(self.dest_path, "libtorrent.so"), dest)
     return True
Exemplo n.º 19
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform
        self.root=os.path.dirname(__file__)

    def check_exist(self):
        for libname in get_libname(self.platform):
            if not xbmcvfs.exists(os.path.join(self.dest_path,libname)):
                return False
        return True

    def check_update(self):
        need_update=False
        for libname in get_libname(self.platform):
            if libname!='liblibtorrent.so':
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath=os.path.join(self.root, self.platform['system'], self.platform['version'], libname+'.size.txt')
                size=str(os.path.getsize(self.libpath))
                size_old=open( self.sizepath, "r" ).read()
                if size_old!=size:
                    need_update=True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], self.platform['version'], libname)
            if libname!='liblibtorrent.so':
                try:
                    self.http = HTTP()
                    self.http.fetch(url, download=dest + ".zip", progress=True)
                    log("%s -> %s" % (url, dest))
                    xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
                    xbmcvfs.delete(dest + ".zip")
                except:
                    text = 'Failed download %s!' % libname
                    xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__,text,750,__icon__))
            else:
                xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest)
        return True

    def android_workaround(self, new_dest_path):
        for libname in get_libname(self.platform):
            libpath=os.path.join(self.dest_path, libname)
            size=str(os.path.getsize(libpath))
            new_libpath=os.path.join(new_dest_path, libname)

            if not xbmcvfs.exists(new_libpath):
                xbmcvfs.copy(libpath, new_libpath)
                log('Copied %s -> %s' %(libpath, new_libpath))
            else:
                new_size=str(os.path.getsize(new_libpath))
                if size!=new_size:
                    xbmcvfs.delete(new_libpath)
                    xbmcvfs.copy(libpath, new_libpath)
                    log('Deleted and copied (%s) %s -> (%s) %s' %(size, libpath, new_size, new_libpath))
        return new_dest_path
Exemplo n.º 20
0
    def attackPOST(self, form):
        """This method performs the file handling attack with method POST"""

        # copies
        get_params = form.get_params
        post_params = form.post_params
        file_params = form.file_params
        referer = form.referer

        err = ""
        for params_list in [get_params, post_params, file_params]:
            for i in xrange(len(params_list)):
                timeouted = False
                warn = 0
                inc = 0
                err500 = 0

                saved_value = params_list[i][1]
                param_name = self.HTTP.quote(params_list[i][0])

                if params_list is file_params:
                    params_list[i][1] = ["_FILE__", params_list[i][1][1]]
                else:
                    params_list[i][1] = "__FILE__"

                attack_pattern = HTTP.HTTPResource(form.path,
                                                   method=form.method,
                                                   get_params=get_params,
                                                   post_params=post_params,
                                                   file_params=file_params)
                if attack_pattern not in self.attackedPOST:
                    self.attackedPOST.append(attack_pattern)
                    for payload in self.payloads:
                        payload = payload.replace('[FILE_NAME]',
                                                  form.file_name)

                        if params_list is file_params:
                            payload = payload.replace('[VALUE]',
                                                      saved_value[0])
                            payload = payload.replace(
                                '[DIRVALUE]', saved_value[0].rsplit('/', 1)[0])
                            params_list[i][1][0] = payload
                        else:
                            payload = payload.replace('[VALUE]', saved_value)
                            payload = payload.replace(
                                '[DIRVALUE]',
                                saved_value.rsplit('/', 1)[0])
                            params_list[i][1] = payload
                        evil_req = HTTP.HTTPResource(form.path,
                                                     method=form.method,
                                                     get_params=get_params,
                                                     post_params=post_params,
                                                     file_params=file_params,
                                                     referer=referer)
                        if self.verbose == 2:
                            print(u"+ {0}".format(evil_req))
                        try:
                            data, code = self.HTTP.send(evil_req).getPageCode()
                        except requests.exceptions.Timeout:
                            if timeouted:
                                continue
                            data = ""
                            code = "408"
                            self.logAnom(category=Anomaly.RES_CONSUMPTION,
                                         level=Anomaly.MEDIUM_LEVEL,
                                         request=evil_req,
                                         parameter=param_name,
                                         info=Anomaly.MSG_PARAM_TIMEOUT.format(
                                             param_name))
                            self.logO(Anomaly.MSG_TIMEOUT, evil_req.path)
                            self.logO(Anomaly.MSG_EVIL_REQUEST)
                            self.logC(evil_req.http_repr)
                            print('')
                            timeouted = True
                        else:
                            err, inc, warn = self.__findPatternInResponse(
                                data, warn)
                        if err != "":
                            info_msg = _(
                                "{0} via injection in the parameter {1}")
                            self.logVuln(category=Vulnerability.FILE_HANDLING,
                                         level=Vulnerability.HIGH_LEVEL,
                                         request=evil_req,
                                         parameter=param_name,
                                         info=info_msg.format(err, param_name))
                            self.logR(Vulnerability.MSG_PARAM_INJECT, err,
                                      evil_req.url, param_name)
                            self.logR(Vulnerability.MSG_EVIL_REQUEST)
                            self.logC(evil_req.http_repr)
                            print('')
                            if inc:
                                break

                        else:
                            if code == "500" and err500 == 0:
                                err500 = 1
                                self.logAnom(category=Anomaly.ERROR_500,
                                             level=Anomaly.HIGH_LEVEL,
                                             request=evil_req,
                                             parameter=param_name,
                                             info=Anomaly.MSG_PARAM_500.format(
                                                 param_name))
                                self.logO(Anomaly.MSG_500, evil_req.url)
                                self.logO(Anomaly.MSG_EVIL_REQUEST)
                                self.logC(evil_req.http_repr)
                                print('')
                params_list[i][1] = saved_value
Exemplo n.º 21
0
    def attackPOST(self, form):
        """This method performs the SQL Injection attack with method POST"""
        payload = "\xbf'\"("
        filename_payload = "'\"("
        err = ""

        # copies
        get_params = form.get_params
        post_params = form.post_params
        file_params = form.file_params
        referer = form.referer

        for params_list in [get_params, post_params, file_params]:
            for i in xrange(len(params_list)):
                saved_value = params_list[i][1]

                if params_list is file_params:
                    params_list[i][1] = ["_SQL__", params_list[i][1][1]]
                else:
                    params_list[i][1] = "__SQL__"

                param_name = self.HTTP.quote(params_list[i][0])
                attack_pattern = HTTP.HTTPResource(form.path,
                                                   method=form.method,
                                                   get_params=get_params,
                                                   post_params=post_params,
                                                   file_params=file_params)
                if attack_pattern not in self.attackedPOST:
                    self.attackedPOST.append(attack_pattern)

                    if params_list is file_params:
                        params_list[i][1][0] = filename_payload
                    else:
                        params_list[i][1] = payload

                    evil_req = HTTP.HTTPResource(form.path,
                                                 method=form.method,
                                                 get_params=get_params,
                                                 post_params=post_params,
                                                 file_params=file_params,
                                                 referer=referer)
                    if self.verbose == 2:
                        print(u"+ {0}".format(evil_req))

                    try:
                        resp = self.HTTP.send(evil_req)
                        data, code = resp.getPageCode()
                    except requests.exceptions.Timeout, timeout:
                        # No timeout report here... launch blind sql detection later
                        data = ""
                        code = "408"
                        resp = timeout
                    else:
                        err = self.__findPatternInResponse(data)
                    if err != "":
                        self.logVuln(
                            category=Vulnerability.SQL_INJECTION,
                            level=Vulnerability.HIGH_LEVEL,
                            request=evil_req,
                            parameter=param_name,
                            info=_("{0} via injection in the parameter {1}"
                                   ).format(err, param_name))
                        self.logR(Vulnerability.MSG_PARAM_INJECT, err,
                                  evil_req.url, param_name)
                        self.logR(Vulnerability.MSG_EVIL_REQUEST)
                        self.logC(evil_req.http_repr)
                        print('')
                        self.vulnerablePOST.append(attack_pattern)

                    else:
                        if code == "500":
                            self.logAnom(
                                category=Anomaly.ERROR_500,
                                level=Anomaly.HIGH_LEVEL,
                                request=evil_req,
                                parameter=param_name,
                                info=Anomaly.MSG_PARAM_500.format(param_name))
                            self.logO(Anomaly.MSG_500, evil_req.url)
                            self.logO(Anomaly.MSG_EVIL_REQUEST)
                            self.logC(evil_req.http_repr)
                            print('')

                params_list[i][1] = saved_value
Exemplo n.º 22
0
    def attackGET(self, http_res):
        """This method performs the SQL Injection attack with method GET"""
        page = http_res.path
        params_list = http_res.get_params
        resp_headers = http_res.headers
        referer = http_res.referer
        headers = {}
        if referer:
            headers["referer"] = referer

        # about this payload : http://shiflett.org/blog/2006/jan/addslashes-versus-mysql-real-escape-string
        payload = "\xBF'\"("
        vuln_found = 0

        if not params_list:
            # Do not attack application-type files
            if not "content-type" in resp_headers:
                # Sometimes there's no content-type... so we rely on the document extension
                if (page.split(".")[-1]
                        not in self.allowed) and page[-1] != "/":
                    return
            elif not "text" in resp_headers["content-type"]:
                return

            err = ""
            payload = self.HTTP.quote(payload)
            url = page + "?" + payload
            if url not in self.attackedGET:
                self.attackedGET.append(url)
                evil_req = HTTP.HTTPResource(url)

                if self.verbose == 2:
                    print(u"+ {0}".format(url))
                try:
                    resp = self.HTTP.send(evil_req, headers=headers)
                    data, code = resp.getPageCode()
                except requests.exceptions.Timeout, timeout:
                    # No timeout report here... launch blind sql detection later
                    data = ""
                    code = "408"
                    err = ""
                    resp = timeout
                else:
                    err = self.__findPatternInResponse(data)
                if err != "":
                    vuln_found += 1
                    self.logVuln(
                        category=Vulnerability.SQL_INJECTION,
                        level=Vulnerability.HIGH_LEVEL,
                        request=evil_req,
                        info=_("{0} via injection in the query string").format(
                            err))
                    self.logR(Vulnerability.MSG_QS_INJECT, err, page)
                    self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url)

                    self.vulnerableGET.append(page + "?" + "__SQL__")

                else:
                    if code == "500":
                        self.logAnom(category=Anomaly.ERROR_500,
                                     level=Anomaly.HIGH_LEVEL,
                                     request=evil_req,
                                     info=Anomaly.MSG_QS_500)
                        self.logO(Anomaly.MSG_500, page)
                        self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)
Exemplo n.º 23
0
class Cache:
    def __init__(self, name, version, expire=0, size=0, step=100):
        self.name = name
        self.version = version
        self._connect()
        if expire:
            self.expire(expire)
        if size:
            self.size(size, step)

    def get(self, token, callback, *param):
        cur = self.db.cursor()
        cur.execute('select expire,data from cache where id=? limit 1', (token,))
        row = cur.fetchone()
        cur.close()

        if row:
            if row[0] and row[0] < int(time.time()):
                pass
            else:
                try:
                    obj = pickle.loads(row[1])
                except:
                    pass
                else:
                    return obj

        response = callback(*param)

        if response[0]:
            obj = sqlite.Binary(pickle.dumps(response[1]))
            curtime = int(time.time())
            cur = self.db.cursor()
            if isinstance(response[0], bool):
                cur.execute('replace into cache(id,addtime,expire,data) values(?,?,?,?)', (token, curtime, None, obj))
            else:
                cur.execute('replace into cache(id,addtime,expire,data) values(?,?,?,?)',
                            (token, curtime, curtime + response[0], obj))
            self.db.commit()
            cur.close()

        return response[1]

    def expire(self, expire):
        # with rtrCache_lock:
        cur = self.db.cursor()
        cur.execute('delete from cache where addtime<?', (int(time.time()) - expire,))
        self.db.commit()
        cur.close()

    def size(self, size, step=100):
        # with rtrCache_lock:
        while True:
            if os.path.getsize(self.filename) < size:
                break
            cur = self.db.cursor()
            cur.execute('select id from cache order by addtime asc limit ?', (step,))
            rows = cur.fetchall()
            if not rows:
                cur.close()
                break
            cur.execute('delete from cache where id in (' + ','.join(len(rows) * '?') + ')', [x[0] for x in rows])
            self.db.commit()
            cur.close()

    def flush(self):
        # with rtrCache_lock:
        cur = self.db.cursor()
        cur.execute('delete from cache')
        self.db.commit()
        cur.close()

    def _connect(self):
        with rtrCache_lock:
            dirname = xbmc.translatePath('special://temp')
            for subdir in ('xbmcup', 'plugin.video.torrenter'):
                dirname = os.path.join(dirname, subdir)
                if not xbmcvfs.exists(dirname):
                    xbmcvfs.mkdir(dirname)

            self.filename = os.path.join(dirname, self.name)

            first = False
            if not xbmcvfs.exists(self.filename):
                first = True

            self.db = sqlite.connect(self.filename, check_same_thread=False)
            if not first:
                cur = self.db.cursor()
                try:
                    cur.execute('select version from db_ver')
                    row = cur.fetchone()
                    if not row or float(row[0]) != self.version:
                        cur.execute('drop table cache')
                        cur.execute('drop table if exists db_ver')
                        first = True
                except:
                    cur.execute('drop table cache')
                    first = True
                self.db.commit()
                cur.close()

            if first and not self.first_time():
                cur = self.db.cursor()
                cur.execute('pragma auto_vacuum=1')
                cur.execute('create table cache(id varchar(255) unique, addtime integer, expire integer, data blob)')
                cur.execute('create index time on cache(addtime asc)')
                cur.execute('create table db_ver(version real)')
                cur.execute('insert into db_ver(version) values(?)', (self.version,))
                self.db.commit()
                cur.close()

    def first_time(self):
        scrapers = {'tvdb': 'TheTVDB.com', 'tmdb': 'TheMovieDB.org', 'kinopoisk': 'KinoPoisk.ru'}
        ok = xbmcgui.Dialog().yesno(Localization.localize('Content Lists'),
                                    Localization.localize('Do you want to preload full metadata?') + ' (%s)' % (
                                        scrapers[os.path.basename(self.filename).split('.')[0]]),
                                    Localization.localize('It is highly recommended!'))
        if ok:
            return self.download()
        else:
            return False

    def download(self):
        dirname = os.path.dirname(self.filename)
        zipname = os.path.basename(self.filename).replace('.db', '') + '.zip'
        url = 'http://www.tat-store.ru/torrenter/' + zipname
        self.http = HTTP()
        response = self.http.fetch(url, download=os.path.join(dirname, zipname), progress=True)
        if response.error:
            return False

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r')
            filezip.extractall(dirname)
            filezip.close()
        except:
            return False

        return True
Exemplo n.º 24
0
class TvDb:
    """
    
    API:
        scraper  - скрапер
        search   - поиск сериалов
        movie    - профайл фильма
        
    """
    
    def __init__(self):
        self.api_key = '1D62F2F90030C444'
        
        self.cache = Cache('tvdb.db', 1.0)
        
        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }
        
        
    # API
    
    def scraper(self, search, year=None):
        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8'))
        except:
            return None
        else:
            
            if year:
                tag += ':' + str(year)
            
            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None
            
            return self.movie(id)
        
    def search(self, name):
        return self._search(name)
    
    
    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)
    
    
    def _movie(self, id):
        dirname = tempfile.mkdtemp()
        response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip'))
        if response.error:
            self._movie_clear(dirname)
            return False, None
        
        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, 'ru.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None
        
        self._movie_clear(dirname)
        
        body = re.compile(r'<Series>(.+?)</Series>', re.U|re.S).search(movie)
        if not body:
            return False, None
        
        body = body.group(1)
        
        res = {
            'icon' : None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count' : int(id)
            }
        }
        
        # режисеры и сценаристы
        for tag in ('Director', 'Writer'):
            people = {}
            people_list = []
            [people_list.extend(x.split('|')) for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U|re.S).findall(movie)]
            [people.update({x: 1}) for x in [x.strip() for x in people_list] if x]
            if people:
                res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x])
        
        for tag, retag, typeof, targettype in (
                    ('plot', 'Overview', None, None),
                    ('mpaa', 'ContentRating', None, None),
                    ('premiered', 'FirstAired', None, None),
                    ('studio', 'Network', None, None),
                    ('title', 'SeriesName', None, None),
                    ('runtime', 'Runtime', None, None),
                    ('votes', 'RatingCount', None, None),
                    ('rating', 'Rating', float, None),
                    ('genre', 'Genre', list, unicode),
                    ('cast', 'Actors', list, None)
                    ):
            r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U|re.S).search(body)
            if r:
                r = r.group(1).strip()
                if typeof == float:
                    res['info'][tag] = float(r)
                elif typeof == list:
                    if targettype == unicode:
                        res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x])
                    else:
                        res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x]
                else:
                    res['info'][tag] = r
        
        # год
        if 'premiered' in res['info']:
            res['info']['year'] = int(res['info']['premiered'].split('-')[0])
        
        # постер
        r = re.compile(r'<poster>([^<]+)</poster>', re.U|re.S).search(body)
        if r:
            res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
            res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        # фанарт
        r = re.compile(r'<fanart>([^<]+)</fanart>', re.U|re.S).search(body)
        if r:
            res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        return timeout, res
            
    
    def _movie_clear(self, dirname):
        for filename in os.listdir(dirname):
            try:
                os.unlink(os.path.join(dirname, filename))
            except:
                raise
        try:
            os.rmdir(dirname)
        except:
            raise
        
    
    def _search(self, search):
        for name in search:
            response = self.http.fetch('http://www.thetvdb.com/api/GetSeries.php?language=ru&seriesname=' + urllib.quote_plus(name.encode('utf8')), headers=self.headers)
            if response.error:
                return None
        
            res = []
            rows = re.compile('<Series>(.+?)</Series>', re.U|re.S).findall(response.body.decode('utf8'))
            if rows:
                recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U|re.S)
            
                for row in [x for x in rows if x.find(u'<language>ru</language>') != -1]:
                    r = recmd.search(row)
                    if r:
                        res.append(int(r.group(1)))
                # в некоторых случаях можно найти только по оригинальному названию, 
                # но при этом русское описание есть
                if not res:
                    for row in [x for x in rows if x.find(u'<language>en</language>') != -1]:
                        r = recmd.search(row)
                        if r:
                            res.append(int(r.group(1)))

            if res:
                break
                
        return {'pages': (1, 0, 1, 0), 'data': res}
    
    
    def _scraper(self, name, year):
        timeout = True
        
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        ids = self._search(name)
        
        if ids is None:
            return False, None
        
        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None
        
        else:
            return timeout, ids['data'][0]
Exemplo n.º 25
0
class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """
    def __init__(self, language='ru'):
        dbname = 'kinopoisk.%s.db' % language
        self.cache = Cache(dbname, 1.0)
        self.html = Clear()

        self.timeout = 60.0

        self.http = HTTP()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, search, year=None):

        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(
                ":".join(search).encode('utf8'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id)

    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)

    def search(self, search, year):
        return self._search_movie(search, year)

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    def _search_movie(self, search, year=None):
        parser = kinopoisk.pageparser.PageParser(kinopoisk.LOGGER,
                                                 isDebug=True)
        orginalname = search[0]
        if len(search) > 1:
            name = search[1]
        else:
            name = None
        results = parser.fetchAndParseSearchResults(orginalname, year, name)
        if results and results[0][3] > 70:
            return results[0][0]

    def _scraper(self, search, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4 week

        movie_id = self._search_movie(search, year)

        if movie_id is None:
            # сохраняем пустой результат на 4 week
            return 7 * 24 * 60 * 60 * 4, None

        else:
            return timeout, movie_id

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/',
                                   headers=self.headers,
                                   timeout=self.timeout)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'icon': None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count': int(id)
            }
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, cb in (
            ('title', '<title>(.+?)</title>', self.html.string),
            ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>',
             self.html.string),
            ('tagline',
             '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>',
             self.html.string), ('mpaa', 'images/mpaa/([^\.]+).gif',
                                 self.html.string),
            ('runtime',
             '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>',
             self.html.string),
            ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"',
             int), ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', int)):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = cb(value)

        # режисеры, сценаристы, жанры
        for tag, reg in (('director', u'<td itemprop="director">(.+?)</td>'), (
                'writer',
                u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
                         ('genre', u'<span itemprop="genre">(.+?)</span>')):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>',
                                    re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4>(.+?)</ul>',
                       re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile(
                    '<li itemprop="actors"><a [^>]+>([^<]+)</a></li>',
                    re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                # res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile(
            '<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>',
            re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>',
                       re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()

        # премьера
        r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>',
                       re.U | re.S).search(html)
        if r:
            r = re.compile(u'data\-ical\-date="([^"]+)"',
                           re.U | re.S).search(r.group(1))
            if r:
                data = r.group(1).split(' ')
                if len(data) == 3:
                    i = 0
                    for mon in (u'января', u'февраля', u'марта', u'апреля',
                                u'мая', u'июня', u'июля', u'августа',
                                u'сентября', u'октября', u'ноября',
                                u'декабря'):
                        i += 1
                        if mon == data[1]:
                            mon = str(i)
                            if len(mon) == 1:
                                mon = '0' + mon
                            day = data[0]
                            if len(day) == 1:
                                day = '0' + day
                            res['info']['premiered'] = '-'.join(
                                [data[2], mon, day])
                            break

        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)',
                       re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                res['thumbnail'] = res['icon'] = 'http://kinopoisk.ru' + poster

        menu = re.compile(
            '<ul id="newMenuSub" class="clearfix(.+?)<!\-\- /menu \-\->',
            re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/wall/',
                                           headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/w_size/([0-9]+)/">',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1),
                                    (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/',
                            headers=self.headers,
                            timeout=self.timeout)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(
                                    1).strip()

            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['properties']['fanart_image'] and menu.find(
                    '/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/stills/',
                                           headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (
                            id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [
                            x for x in fanart
                            if int(x[1]) <= 1280 and int(x[1]) > int(x[2])
                        ]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/',
                            headers=self.headers,
                            timeout=self.timeout)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(
                                    1).strip()

            # студии
            if menu.find('/film/' + id + '/studio/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/studio/',
                                           headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    r = re.compile(u'<b>Производство:</b>(.+?)</table>',
                                   re.U | re.S).search(html)
                    if r:
                        studio = []
                        for r in re.compile(
                                '<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>',
                                re.U).findall(r.group(1)):
                            r = self.html.string(r)
                            if r:
                                studio.append(r)
                        if studio:
                            res['info']['studio'] = u', '.join(studio)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or not res['properties']['fanart_image'] \
                or int(res['info']['year']) > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4 week

        return timeout, res
Exemplo n.º 26
0
                                # No more payload injection
                                break

        # URL contains parameters
        else:
            for i in xrange(len(params_list)):
                saved_value = params_list[i][1]
                param_name = self.HTTP.quote(params_list[i][0])
                params_list[i][1] = "__XSS__"
                url = page + "?" + self.HTTP.encode(params_list)

                if url not in self.attackedGET:
                    self.attackedGET.append(url)
                    code = self.random_string()
                    params_list[i][1] = code
                    test_url = HTTP.HTTPResource(page + "?" +
                                                 self.HTTP.encode(params_list))
                    self.GET_XSS[code] = (test_url, param_name)
                    try:
                        resp = self.HTTP.send(test_url, headers=headers)
                        data = resp.getPage()
                    except requests.exceptions.Timeout, timeout:
                        data = ""
                        resp = timeout
                    # is the random code on the webpage ?
                    if code in data:
                        # YES! But where exactly ?
                        payloads = self.generate_payloads(data, code)
                        for payload in payloads:

                            params_list[i][1] = payload
Exemplo n.º 27
0
class TvDb:
    """
    
    API:
        scraper  - скрапер
        search   - поиск сериалов
        movie    - профайл фильма
        
    """

    def __init__(self, language='en'):
        self.api_key = '33DBB309BB2B0ADB'
        dbname='tvdb.%s.db' % language
        self.cache = Cache(dbname, 1.0)

        self.language = language

        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }


    # API

    def scraper(self, search, year=None):
        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id)

    def search(self, search, year=None):
        return self._search(search, year)


    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)


    def _movie(self, id):
        try:
            dirname = tempfile.mkdtemp()
        except:
            dirname = xbmc.translatePath('special://temp')
            for subdir in ('xbmcup', 'plugin.video.torrenter'):
                dirname = os.path.join(dirname, subdir)
                if not os.path.exists(dirname):
                    os.mkdir(dirname)

        url = 'http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/' + self.language + '.zip'
        # print url
        response = self.http.fetch(url, headers=self.headers, download=os.path.join(dirname, 'movie.zip'), timeout=20)
        if response.error:
            print "ERRRRRROR! " + str(response.error)
            self._movie_clear(dirname)
            return False, None

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, self.language + '.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None

        self._movie_clear(dirname)

        body = re.compile(r'<Series>(.+?)</Series>', re.U | re.S).search(movie)
        if not body:
            return False, None

        body = body.group(1)

        res = {
            'icon': None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count': int(id)
            }
        }

        # режисеры и сценаристы
        for tag in ('Director', 'Writer'):
            people = {}
            people_list = []
            [people_list.extend(x.split('|')) for x in
             re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U | re.S).findall(movie)]
            [people.update({x: 1}) for x in [x.strip() for x in people_list] if x]
            if people:
                res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x])

        for tag, retag, typeof, targettype in (
                ('plot', 'Overview', None, None),
                ('mpaa', 'ContentRating', None, None),
                ('premiered', 'FirstAired', None, None),
                ('studio', 'Network', None, None),
                ('title', 'SeriesName', None, None),
                ('runtime', 'Runtime', None, None),
                ('votes', 'RatingCount', None, None),
                ('rating', 'Rating', float, None),
                ('genre', 'Genre', list, unicode),
                ('cast', 'Actors', list, None)
        ):
            r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U | re.S).search(body)
            if r:
                r = r.group(1).strip()
                if typeof == float:
                    res['info'][tag] = float(r)
                elif typeof == list:
                    if targettype == unicode:
                        res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x])
                    else:
                        res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x]
                else:
                    res['info'][tag] = r

        # год
        if 'premiered' in res['info']:
            res['info']['year'] = int(res['info']['premiered'].split('-')[0])

        # постер
        r = re.compile(r'<poster>([^<]+)</poster>', re.U | re.S).search(body)
        if r:
            res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
            res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip()

        # фанарт
        r = re.compile(r'<fanart>([^<]+)</fanart>', re.U | re.S).search(body)
        if r:
            res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip()

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or not res['properties']['fanart_image'] \
                or int(res['info']['year']) > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  #4 week

        return timeout, res


    def _movie_clear(self, dirname):
        for filename in os.listdir(dirname):
            try:
                os.unlink(os.path.join(dirname, filename))
            except:
                raise
        try:
            os.rmdir(dirname)
        except:
            raise


    def _search(self, search, year=None):
        i = -1
        id = None
        for name in search:
            # print urllib.quote_plus(name.encode('utf-8'))
            url = 'http://www.thetvdb.com/api/GetSeries.php?language=' + self.language + '&seriesname=' + urllib.quote_plus(
                name.encode('utf-8'))
            #print url
            i += 1
            response = self.http.fetch(url, headers=self.headers, timeout=20)
            #print response.body
            if response.error:
                #print "ERRRRRROR! "+str(response.error)
                return None

            res = []
            rows = re.compile('<Series>(.+?)</Series>', re.U | re.S).findall(response.body.decode('utf8'))
            if rows:
                recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U | re.S)

                for row in [x for x in rows if x.find(u'<language>%s</language>' % self.language.decode('utf8')) != -1]:
                    r = recmd.search(row)
                    if r:
                        res.append(int(r.group(1)))
                # в некоторых случаях можно найти только по оригинальному названию, 
                # но при этом русское описание есть
                if not res and self.language != 'en':
                    for row in [x for x in rows if x.find(u'<language>en</language>') != -1]:
                        r = recmd.search(row)
                        if r:
                            res.append(int(r.group(1)))

                if len(res) > 1:
                    Data = []
                    for id in res:
                        for row in rows:
                            recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U | re.S)
                            r = recmd.search(row)
                            if int(r.group(1)) == id:
                                title = re.compile('<SeriesName>(.+?)</SeriesName>', re.U | re.S).search(row)
                                Syear = re.compile('<FirstAired>(.+?)</FirstAired>', re.U | re.S).search(row)
                                if not Syear:
                                    Syear = 0
                                else:
                                    Syear = Syear.group(1)
                                Data.append((title.group(1), Syear, id))

                    index = get_best(Data, search, year)
                    if index and index['rate'] > 70:
                        id = str(index['id'])
                elif len(res) == 1:
                    id = str(res[0])

            if id:
                break

        return id


    def _scraper(self, search, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4week

        id = self._search(search, year)

        if id is None:
            return 7 * 24 * 60 * 60 * 4, None

        else:
            # print str((timeout, ids['data'][0]))
            return timeout, id
Exemplo n.º 28
0
                                     info=Anomaly.MSG_QS_500)
                        self.logO(Anomaly.MSG_500, page)
                        self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)
        else:
            for i in range(len(params_list)):
                err = ""
                param_name = self.HTTP.quote(params_list[i][0])
                saved_value = params_list[i][1]
                params_list[i][1] = "__SQL__"
                pattern_url = page + "?" + self.HTTP.encode(params_list)
                if pattern_url not in self.attackedGET:
                    self.attackedGET.append(pattern_url)

                    params_list[i][1] = self.HTTP.quote(payload)
                    url = page + "?" + self.HTTP.encode(params_list)
                    evil_req = HTTP.HTTPResource(url)

                    if self.verbose == 2:
                        print(u"+ {0}".format(evil_req.url))
                    try:
                        resp = self.HTTP.send(evil_req, headers=headers)
                        data, code = resp.getPageCode()
                    except requests.exceptions.Timeout, timeout:
                        # No timeout report here... launch blind sql detection later
                        data = ""
                        code = "408"
                        err = ""
                        resp = timeout
                    else:
                        err = self.__findPatternInResponse(data)
                    if err != "":
Exemplo n.º 29
0
class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """
    def __init__(self):
        self.cache = Cache('kinopoisk.db', 1.0)
        self.html = Clear()

        self.http = HTTP()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, search, year=None, trailer_quality=None):

        try:
            if isinstance(search, list):
                search = search[0] or ""
            tag = 'scraper:' + urllib.quote_plus(search.encode('windows-1251'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id, trailer_quality)

    def movie(self, id, trailer_quality=None):
        id = str(id)

        if trailer_quality is None:
            trailer_quality = 6

        movie = self.cache.get('movie:' + id, self._movie, id)
        if not movie:
            return None

        if 'trailers' in movie and movie['trailers']:
            # компилируем список с нужным нам качеством
            video = []
            for m in movie['trailers']:
                url = [x for x in m['video'] if x[0] <= trailer_quality]
                if url:
                    m['video'] = url[-1]
                    video.append(m)

            movie['trailers'] = video

            if movie['trailers']:
                # готовим главный трейлер
                r = [x for x in movie['trailers'] if x['trailer']]
                if r:
                    movie['info']['trailer'] = r[0]['video'][1]
                else:
                    # если трейлер не найден, то отдаем что попало...
                    movie['info']['trailer'] = movie['trailers'][0]['video'][1]

        return movie

    def search(self, name, trailer_quality=None):
        return self._search_movie(name)

    def best(self, **kwarg):
        page = kwarg.get('page', 1)
        limit = kwarg.get('limit', 50)

        url = 'http://www.kinopoisk.ru/top/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str(
            kwarg.get('votes', 100)) + '/'

        if kwarg.get('dvd'):
            url += 'm_act%5Bis_dvd%5D/on/'

        if kwarg.get('decade'):
            url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/'

        if kwarg.get('genre'):
            url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/'

        if kwarg.get('country'):
            url += 'm_act%5Bcountry%5D/' + str(kwarg['country']) + '/'

        if kwarg.get('rate'):
            url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/'

        if kwarg.get('mpaa'):
            url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/'

        url += 'perpage/' + str(limit) + '/order/ex_rating/'

        if page > 1:
            url += 'page/' + str(page) + '/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = {'pages': (1, 0, 1, 0), 'data': []}

        r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo',
                       re.U | re.S).search(
                           response.body.decode('windows-1251'))
        if r:

            body = r.group(1)

            # compile pagelist
            p = re.compile('>([0-9]+)&mdash;[0-9]+[^0-9]+?([0-9]+)',
                           re.U).search(body)
            if p:
                page = (int(p.group(1)) - 1) / limit + 1
                total = int(p.group(2))
                pages = total / limit
                if limit * pages != total:
                    pages += 1
                res['pages'] = (pages, 0 if page == 1 else page - 1, page,
                                0 if page == pages else page + 1)
            # end compile

            for id in re.compile('<div id="tr_([0-9]+)"',
                                 re.U | re.S).findall(body):
                res['data'].append(int(id))

        return res

    def person(self, name):
        response = self.http.fetch(
            'http://www.kinopoisk.ru/s/type/people/list/1/find/' +
            urllib.quote_plus(name.encode('windows-1251')) +
            '/order/relevant/',
            headers=self.headers)
        if response.error:
            return None

        res = []
        body = re.compile(
            '<div class="navigator">(.+?)<div class="navigator">',
            re.U | re.S).search(response.body.decode('windows-1251'))
        if body:

            for block in re.compile('<p class="pic">(.+?)<div class="clear">',
                                    re.U | re.S).findall(body.group(1)):

                id, name, original, year, poster = None, None, None, None, None

                r = re.compile(
                    '<p class="name"><a href="http://www\.kinopoisk\.ru/level/4/people/([0-9]+)[^>]+>([^<]+)</a>',
                    re.U | re.S).search(block)
                if r:
                    id = r.group(1)
                    name = r.group(2).strip()

                    if id and name:

                        r = re.compile('<span class="gray">([^<]+)</span>',
                                       re.U | re.S).search(block)
                        if r:
                            original = r.group(1).strip()
                            if not original:
                                original = None

                        r = re.compile('<span class="year">([0-9]{4})</span>',
                                       re.U | re.S).search(block)
                        if r:
                            year = int(r.group(1))

                        if block.find('no-poster.gif') == -1:
                            poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg'

                        res.append({
                            'id': int(id),
                            'name': name,
                            'originalname': original,
                            'year': year,
                            'poster': poster
                        })

        return {'pages': (1, 0, 1, 0), 'data': res}

    def work(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) +
                                   '/',
                                   headers=self.headers)
        if response.error:
            return None

        res = {}

        r = re.compile('id="sort_block">(.+?)<style>', re.U | re.S).search(
            response.body.decode('windows-1251'))
        if r:
            for block in r.group(1).split(
                    u'<table cellspacing="0" cellpadding="0" border="0" width="100%">'
            ):
                work = None

                for w in ('actor', 'director', 'writer', 'producer',
                          'producer_ussr', 'composer', 'operator', 'editor',
                          'design', 'voice', 'voice_director'):
                    if block.find(u'id="' + w + u'"') != -1:
                        work = 'producer' if w == 'producer_ussr' else w
                        break

                if work:

                    movies = []

                    for id, name in re.compile(
                            '<span class="name"><a href="/film/([0-9]+)/" >([^<]+?)</a>',
                            re.U).findall(block):
                        for tag in (u'(мини-сериал)', u'(сериал)'):
                            if name.find(tag) != -1:
                                break
                        else:
                            movies.append(int(id))

                    if movies:
                        res.setdefault(work, []).extend(movies)

        return res

    def review(self, id, query):
        query_s = 'all' if query == 'stat' else query
        data = self.cache.get('review:' + str(id) + ':' + query_s,
                              self._review, id, query_s)
        if not data:
            return data
        return data[query]

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    # PRIVATE

    def _search_movie(self, name, year=None):
        url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus(
            name.encode('windows-1251'))  # + '/order/relevant'
        if year:
            url += '/m_act%5Byear%5D/' + str(year)
        url += '/m_act%5Btype%5D/film/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = []
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">',
                       re.U | re.S).search(
                           response.body.decode('windows-1251'))
        if r:
            for id in re.compile(
                    '<p class="name"><a href="/level/1/film/([0-9]+)',
                    re.U | re.S).findall(r.group(1)):
                res.append(int(id))

        return {'pages': (1, 0, 1, 0), 'data': res}

    def _scraper(self, name, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  #week

        ids = self._search_movie(name, year)

        if ids is None:
            return False, None

        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None

        else:
            return timeout, ids['data'][0]

    def _review(self, id, query):
        url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/'
        if query in ('good', 'bad', 'neutral'):
            url += 'status/' + query + '/'
        url += 'perpage/200/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'stat': {
                'all': 0,
                'good': 0,
                'bad': 0,
                'neutral': 0
            },
            query: []
        }

        r = re.compile('<ul class="resp_type">(.+?)</ul>',
                       re.U | re.S).search(html)
        if r:
            ul = r.group(1)

            for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')):
                r = re.compile(
                    '<li class="' + q +
                    '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>',
                    re.U).search(ul)
                if r:
                    res['stat'][t] = int(r.group(1))

            res['stat']['all'] = res['stat']['good'] + res['stat'][
                'bad'] + res['stat']['neutral']

        r = re.compile('<div class="navigator">(.+?)<div class="navigator">',
                       re.U | re.S).search(html)
        if r:

            for block in r.group(1).split('itemprop="reviews"'):

                review = {
                    'nick': None,
                    'count': None,
                    'title': None,
                    'review': None,
                    'time': None
                }

                r = re.compile('itemprop="reviewBody">(.+?)</div>',
                               re.U | re.S).search(block)
                if r:

                    text = r.group(1)
                    for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'),
                                       (u'</b>', u'[/B]'), (u'<i>', u'[I]'),
                                       (u'</i>', u'[/I]'), (u'<u>', u'[U]'),
                                       (u'</u>', u'[/U]')):
                        text = text.replace(tag1, tag2)

                    r = self.html.text(text)
                    if r:
                        review['review'] = r

                user = None
                r = re.compile(
                    '<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>'
                ).search(block)
                if r:
                    user = self.html.string(r.group(1))
                else:
                    r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>'
                                   ).search(block)
                    if r:
                        user = self.html.string(r.group(1))
                if user:
                    review['nick'] = user

                r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search(
                    block)
                if r:
                    title = self.html.string(r.group(1))
                    if title:
                        review['title'] = title

                r = re.compile('<span class="date">([^<]+)</span>',
                               re.U | re.S).search(block)
                if r:
                    review['time'] = r.group(1).replace(u' |', u',')

                r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>',
                               re.U | re.S).search(block)
                if r:
                    review['count'] = int(r.group(1))

                if review['nick'] and review['review']:
                    res[query].append(review)

        return 3600, res  # one hour

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/',
                                   headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'icon': None,
            'thumbnail': None,
            'info': {
                'count': int(id)
            },
            'properties': {
                'fanart_image': None,
            },
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, t in (
            ('title', '<title>(.+?)</title>',
             'str'), ('originaltitle',
                      'itemprop="alternativeHeadline">([^<]*)</span>', 'str'),
            ('tagline',
             '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>', 'str'),
            ('mpaa', 'itemprop="contentRating"\s+content="MPAA\s+([^"]+)"',
             'str'),
            ('runtime',
             '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>',
             'str'), ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"',
                      'int'),
            ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', 'int')):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = value
                    if t == 'int':
                        res['info'][tag] = int(res['info'][tag])
                    else:
                        res['info'][tag] = self.html.string(res['info'][tag])

        # режисеры, сценаристы, жанры
        for tag, reg in (('director', u'<td itemprop="director">(.+?)</td>'), (
                'writer',
                u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
                         ('genre', u'<td itemprop="genre">(.+?)</td>')):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>',
                                    re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4><ul>(.+?)</ul>',
                       re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile(
                    '<li itemprop="actors"><a [^>]+>([^<]+)</a></li>',
                    re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                #res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile(
            '<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>',
            re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>',
                       re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()

        # # премьера
        # r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U|re.S).search(html)
        # if r:
        #     r = re.compile(u'data\-ical\-date="([^"]+)"', re.U|re.S).search(r.group(1))
        #     if r:
        #         data = r.group(1).split(' ')
        #         if len(data) == 3:
        #             i = 0
        #             for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'):
        #                 i += 1
        #                 if mon == data[1]:
        #                     mon = str(i)
        #                     if len(mon) == 1:
        #                         mon = '0' + mon
        #                     day = data[0]
        #                     if len(day) == 1:
        #                         day = '0' + day
        #                     res['info']['premiered'] = '-'.join([data[2], mon, day])
        #                     break

        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)',
                       re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                if poster.startswith("/"):
                    poster = "http://www.kinopoisk.ru%s" % poster
                res['icon'] = poster
                res['thumbnail'] = poster

        menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)</ul>',
                          re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/wall/',
                                           headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/w_size/([0-9]+)/">',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1),
                                    (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/',
                            headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(
                                    1).strip()

            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['properties']['fanart_image'] and menu.find(
                    '/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/stills/',
                                           headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (
                            id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [
                            x for x in fanart
                            if int(x[1]) <= 1280 and int(x[1]) > int(x[2])
                        ]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/',
                            headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(
                                    1).strip()

            # # студии
            # if menu.find('/film/' + id + '/studio/') != -1:
            #     response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers)
            #     if not response.error:
            #         html = response.body.decode('windows-1251')
            #         r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U|re.S).search(html)
            #         if r:
            #             studio = []
            #             for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)):
            #                 r = self.html.string(r)
            #                 if r:
            #                     studio.append(r)
            #             if studio:
            #                 res['info']['studio'] = u', '.join(studio)

            # трэйлеры

            # trailers1 = [] # русские трейлеры
            # trailers2 = [] # другие русские видео
            # trailers3 = [] # трейлеры
            # trailers4 = [] # другие видео

            # if menu.find('/film/' + id + '/video/') != -1:
            #     response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/video/', headers=self.headers)
            #     if not response.error:
            #         html = response.body.decode('windows-1251')

            #         for row in re.compile(u'<!-- ролик -->(.+?)<!-- /ролик -->', re.U|re.S).findall(html):

            #             # отсекаем лишние блоки
            #             if row.find(u'>СМОТРЕТЬ</a>') != -1:

            #                 # русский ролик?
            #                 if row.find('class="flag flag2"') == -1:
            #                     is_ru = False
            #                 else:
            #                     is_ru = True

            #                 # получаем имя трейлера
            #                 r = re.compile('<a href="/film/' + id + '/video/[0-9]+/[^>]+ class="all">(.+?)</a>', re.U).search(row)
            #                 if r:
            #                     name = self.html.string(r.group(1))
            #                     if name:

            #                         trailer = {
            #                             'name': name,
            #                             'time': None,
            #                             'trailer': False,
            #                             'ru': is_ru,
            #                             'video': []
            #                         }

            #                         # трейлер или тизер?
            #                         for token in (u'Трейлер', u'трейлер', u'Тизер', u'тизер'):
            #                             if name.find(token) != -1:
            #                                 trailer['trailer'] = True
            #                                 break

            #                         # получаем время трейлера
            #                         r = re.compile(u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>', re.U|re.S).search(row)
            #                         if r:
            #                             trailer['time'] = r.group(1).strip()

            #                         # делим ролики по качеству
            #                         for r in re.compile('trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>', re.U|re.S).findall(row):
            #                             quality = int(r[0])
            #                             if r[1].find('icon-hd') != -1:
            #                                 quality += 3

            #                             trailer['video'].append((quality, r[2].strip(), r[3]))

            #                         if trailer['video']:
            #                             if trailer['ru']:
            #                                 if trailer['trailer']:
            #                                     trailers1.append(trailer)
            #                                 else:
            #                                     trailers2.append(trailer)
            #                             else:
            #                                 if trailer['trailer']:
            #                                     trailers3.append(trailer)
            #                                 else:
            #                                     trailers4.append(trailer)

            # # склеиваем трейлеры
            # res['trailers'].extend(trailers1)
            # res['trailers'].extend(trailers2)
            # res['trailers'].extend(trailers3)
            # res['trailers'].extend(trailers4)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(
                res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  #week

        return timeout, res
Exemplo n.º 30
0
class TvDb:
    """
    
    API:
        scraper  - скрапер
        search   - поиск сериалов
        movie    - профайл фильма
        
    """
    
    def __init__(self):
        self.api_key = '33DBB309BB2B0ADB'
        
        self.cache = Cache('tvdb.db', 1.0)
        
        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }
        
        
    # API
    
    def scraper(self, search, year=None, season=None):
        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8'))
        except:
            return None
        else:
            
            if year:
                tag += ':' + str(year)

            
            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            if season: return self.get_banners(id)
            
            return self.movie(id)

    def get_banners(self, id):
        import xml.etree.ElementTree as ET
        dirname = tempfile.mkdtemp()
        response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + str(id) + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip'))
        if response.error:
            self._movie_clear(dirname)
            return False, None

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, 'banners.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None

        self._movie_clear(dirname)

        dom = ET.fromstring(movie)
        if not len(dom):
            return

        def dom2dict(node):
            ret = {}
            for child in node:
                if len(child):
                    ret.setdefault(child.tag.lower(), []).append(dom2dict(child))
                else:
                    ret[child.tag.lower()] = child.text
            return ret

        def update_image_urls(meta):
            if isinstance(meta, dict):
                for k, v in meta.items():
                    if isinstance(v, list):
                        map(update_image_urls, v)
                    elif isinstance(v, dict):
                        update_image_urls(v)
                    elif k in ["banner", "fanart", "poster", "filename", "bannerpath", "vignettepath", "thumbnailpath"] and isinstance(v, basestring):
                        meta[k] = image_url(v)
            return meta

        def image_url(fragment):
            return "%s/banners/%s" % ("http://www.thetvdb.com", fragment)

        return update_image_urls(dom2dict(dom))["banner"]

    def search(self, name):
        return self._search(name)
    
    
    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)
    
    
    def _movie(self, id):
        dirname = tempfile.mkdtemp()
        response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip'))
        if response.error:
            self._movie_clear(dirname)
            return False, None
        
        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, 'ru.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None
        
        self._movie_clear(dirname)
        
        body = re.compile(r'<Series>(.+?)</Series>', re.U|re.S).search(movie)
        if not body:
            return False, None
        
        body = body.group(1)
        
        res = {
            'icon' : None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count' : int(id)
            }
        }
        
        # режисеры и сценаристы
        for tag in ('Director', 'Writer'):
            people = {}
            people_list = []
            [people_list.extend(x.split('|')) for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U|re.S).findall(movie)]
            [people.update({x: 1}) for x in [x.strip() for x in people_list] if x]
            if people:
                res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x])
        
        for tag, retag, typeof, targettype in (
                    ('plot', 'Overview', None, None),
                    ('mpaa', 'ContentRating', None, None),
                    ('premiered', 'FirstAired', None, None),
                    ('studio', 'Network', None, None),
                    ('title', 'SeriesName', None, None),
                    ('runtime', 'Runtime', None, None),
                    ('votes', 'RatingCount', None, None),
                    ('rating', 'Rating', float, None),
                    ('genre', 'Genre', list, unicode),
                    ('cast', 'Actors', list, None)
                    ):
            r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U|re.S).search(body)
            if r:
                r = r.group(1).strip()
                if typeof == float:
                    res['info'][tag] = float(r)
                elif typeof == list:
                    if targettype == unicode:
                        res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x])
                    else:
                        res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x]
                else:
                    res['info'][tag] = r
        
        # год
        if 'premiered' in res['info']:
            res['info']['year'] = int(res['info']['premiered'].split('-')[0])
        
        # постер
        r = re.compile(r'<poster>([^<]+)</poster>', re.U|re.S).search(body)
        if r:
            res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
            res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        # фанарт
        r = re.compile(r'<fanart>([^<]+)</fanart>', re.U|re.S).search(body)
        if r:
            res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        return timeout, res
            
    
    def _movie_clear(self, dirname):
        for filename in os.listdir(dirname):
            try:
                os.unlink(os.path.join(dirname, filename))
            except:
                raise
        try:
            os.rmdir(dirname)
        except:
            raise
        
    
    def _search(self, search):
        i=-1
        for name in search:
            i+=1
            response = self.http.fetch('http://www.thetvdb.com/api/GetSeries.php?language=ru&seriesname=' + urllib.quote_plus(name.encode('utf-8','ignore')), headers=self.headers)
            if response.error:
                return None
        
            res = []
            rows = re.compile('<Series>(.+?)</Series>', re.U|re.S).findall(response.body.decode('utf8'))
            if rows:
                recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U|re.S)
            
                for row in [x for x in rows if x.find(u'<language>ru</language>') != -1]:
                    r = recmd.search(row)
                    if r:
                        res.append(int(r.group(1)))
                # в некоторых случаях можно найти только по оригинальному названию, 
                # но при этом русское описание есть
                if not res:
                    for row in [x for x in rows if x.find(u'<language>en</language>') != -1]:
                        r = recmd.search(row)
                        if r:
                            res.append(int(r.group(1)))

            if res:
                break
                
        return {'pages': (1, 0, 1, 0), 'data': res}
    
    
    def _scraper(self, name, year):
        timeout = True
        
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        ids = self._search(name)
        
        if ids is None:
            return False, None
        
        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None
        
        else:
            return timeout, ids['data'][0]
Exemplo n.º 31
0
class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """

    def __init__(self, language='ru'):
        dbname = 'kinopoisk.%s.db' % language
        self.cache = Cache(dbname, 1.0)
        self.html = Clear()

        self.timeout = 60.0

        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, search, year=None):

        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id)

    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)

    def search(self, search, year):
        return self._search_movie(search, year)

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    def _search_movie(self, search, year=None):
        parser = kinopoisk.pageparser.PageParser(kinopoisk.LOGGER, isDebug=True)
        orginalname = search[0]
        if len(search) > 1:
            name = search[1]
        else:
            name = None
        results = parser.fetchAndParseSearchResults(orginalname, year, name)
        if results and results[0][3] > 70:
            return results[0][0]

    def _scraper(self, search, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4 week

        movie_id = self._search_movie(search, year)

        if movie_id is None:
            # сохраняем пустой результат на 4 week
            return 7 * 24 * 60 * 60 * 4, None

        else:
            return timeout, movie_id

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers,
                                   timeout=self.timeout)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'icon': None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count': int(id)
            }
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, cb in (
                ('title', '<title>(.+?)</title>', self.html.string),
                ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', self.html.string),
                ('tagline', '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>', self.html.string),
                ('mpaa', 'images/mpaa/([^\.]+).gif', self.html.string),
                ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>',
                 self.html.string),
                ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', int),
                ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', int)

        ):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = cb(value)


        # режисеры, сценаристы, жанры
        for tag, reg in (
                ('director', u'<td itemprop="director">(.+?)</td>'),
                ('writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
                ('genre', u'<span itemprop="genre">(.+?)</span>')
        ):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4>(.+?)</ul>', re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile('<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                # res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile('<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>',
                       re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()


        # премьера
        r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U | re.S).search(html)
        if r:
            r = re.compile(u'data\-ical\-date="([^"]+)"', re.U | re.S).search(r.group(1))
            if r:
                data = r.group(1).split(' ')
                if len(data) == 3:
                    i = 0
                    for mon in (
                            u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа',
                            u'сентября',
                            u'октября', u'ноября', u'декабря'):
                        i += 1
                        if mon == data[1]:
                            mon = str(i)
                            if len(mon) == 1:
                                mon = '0' + mon
                            day = data[0]
                            if len(day) == 1:
                                day = '0' + day
                            res['info']['premiered'] = '-'.join([data[2], mon, day])
                            break


        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                res['thumbnail'] = res['icon'] = 'http://kinopoisk.ru' + poster

        menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)<!\-\- /menu \-\->', re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile('<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/',
                            headers=self.headers, timeout=self.timeout)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(1).strip()


            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['properties']['fanart_image'] and menu.find('/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2])]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/',
                                                   headers=self.headers, timeout=self.timeout)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(1).strip()


            # студии
            if menu.find('/film/' + id + '/studio/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U | re.S).search(html)
                    if r:
                        studio = []
                        for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>',
                                            re.U).findall(r.group(1)):
                            r = self.html.string(r)
                            if r:
                                studio.append(r)
                        if studio:
                            res['info']['studio'] = u', '.join(studio)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or not res['properties']['fanart_image'] \
                or int(res['info']['year']) > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4 week

        return timeout, res
Exemplo n.º 32
0
    def attackPOST(self, form):
        """This method performs the cross site scripting attack (XSS attack) with method POST"""
        page = form.url
        referer = form.referer
        headers = {}
        if referer:
            headers["referer"] = referer

        if page not in self.PHP_SELF:
            evil_req = None
            if page.endswith("/"):
                evil_req = HTTP.HTTPResource(page + self.php_self_payload)
            elif page.endswith(".php"):
                evil_req = HTTP.HTTPResource(page + "/" +
                                             self.php_self_payload)
            if evil_req:
                if self.verbose == 2:
                    print(u"+ {0}".format(evil_req.url))
                data, http_code = self.HTTP.send(
                    evil_req, headers=headers).getPageCode()
                if self._validXSSContentType(
                        evil_req) and self.php_self_check in data:
                    self.logR(Vulnerability.MSG_PATH_INJECT, self.MSG_VULN,
                              page)
                    self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url)

                    self.logVuln(
                        category=Vulnerability.XSS,
                        level=Vulnerability.HIGH_LEVEL,
                        request=evil_req,
                        parameter="PHP_SELF",
                        info=
                        _("XSS vulnerability found via injection in the resource path"
                          ))
            self.PHP_SELF.append(page)

        # copies
        get_params = form.get_params
        post_params = form.post_params
        file_params = form.file_params

        for params_list in [get_params, post_params, file_params]:
            for i in xrange(len(params_list)):
                param_name = self.HTTP.quote(params_list[i][0])
                saved_value = params_list[i][1]
                if params_list is file_params:
                    params_list[i][1] = ["_XSS__", params_list[i][1][1]]
                else:
                    params_list[i][1] = "__XSS__"
                # We keep an attack pattern to be sure a given form won't be attacked on the same field several times
                attack_pattern = HTTP.HTTPResource(form.path,
                                                   method=form.method,
                                                   get_params=get_params,
                                                   post_params=post_params,
                                                   file_params=file_params)
                if not attack_pattern in self.attackedPOST:
                    self.attackedPOST.append(attack_pattern)
                    code = self.random_string()
                    if params_list is file_params:
                        params_list[i][1][0] = code
                    else:
                        params_list[i][1] = code
                    # will only memorize the last used payload (working or not) but the code will always be the good
                    test_payload = HTTP.HTTPResource(form.path,
                                                     method=form.method,
                                                     get_params=get_params,
                                                     post_params=post_params,
                                                     file_params=file_params,
                                                     referer=referer)

                    self.POST_XSS[code] = (test_payload, param_name)
                    try:
                        resp = self.HTTP.send(test_payload)
                        data = resp.getPage()
                    except requests.exceptions.Timeout, timeout:
                        data = ""
                        resp = timeout
                    # rapid search on the code to check injection
                    if code in data:
                        # found, now study where the payload is injected and how to exploit it
                        payloads = self.generate_payloads(data, code)
                        for payload in payloads:
                            if params_list is file_params:
                                params_list[i][1][0] = payload
                            else:
                                params_list[i][1] = payload

                            evil_req = HTTP.HTTPResource(
                                form.path,
                                method=form.method,
                                get_params=get_params,
                                post_params=post_params,
                                file_params=file_params,
                                referer=referer)

                            if self.verbose == 2:
                                print(u"+ {0}".format(evil_req))
                            try:
                                resp = self.HTTP.send(evil_req)
                                dat = resp.getPage()
                            except requests.exceptions.Timeout, timeout:
                                dat = ""
                                resp = timeout

                            if self._validXSSContentType(
                                    evil_req
                            ) and dat is not None and len(dat) > 1:
                                if payload.lower() in dat.lower():
                                    self.SUCCESSFUL_XSS[code] = payload
                                    self.logVuln(
                                        category=Vulnerability.XSS,
                                        level=Vulnerability.HIGH_LEVEL,
                                        request=evil_req,
                                        parameter=param_name,
                                        info=_(
                                            "XSS vulnerability found via injection"
                                            " in the parameter {0}").format(
                                                param_name))

                                    self.logR(Vulnerability.MSG_PARAM_INJECT,
                                              self.MSG_VULN, evil_req.url,
                                              param_name)

                                    self.logR(Vulnerability.MSG_EVIL_REQUEST)
                                    self.logC(evil_req.http_repr)
                                    print('')
                                    # Stop injecting payloads and move to the next parameter
                                    break

                # restore the saved parameter in the list
                params_list[i][1] = saved_value
Exemplo n.º 33
0
class Cache:
    def __init__(self, name, version, expire=0, size=0, step=100):
        self.name = name
        self.version = version
        self._connect()
        if expire:
            self.expire(expire)
        if size:
            self.size(size, step)

    def get(self, token, callback, *param):
        cur = self.db.cursor()
        cur.execute('select expire,data from cache where id=? limit 1',
                    (token, ))
        row = cur.fetchone()
        cur.close()

        if row:
            if row[0] and row[0] < int(time.time()):
                pass
            else:
                try:
                    obj = pickle.loads(row[1])
                except:
                    pass
                else:
                    return obj

        response = callback(*param)

        if response[0]:
            obj = sqlite.Binary(pickle.dumps(response[1]))
            curtime = int(time.time())
            cur = self.db.cursor()
            if isinstance(response[0], bool):
                cur.execute(
                    'replace into cache(id,addtime,expire,data) values(?,?,?,?)',
                    (token, curtime, None, obj))
            else:
                cur.execute(
                    'replace into cache(id,addtime,expire,data) values(?,?,?,?)',
                    (token, curtime, curtime + response[0], obj))
            self.db.commit()
            cur.close()

        return response[1]

    def expire(self, expire):
        # with rtrCache_lock:
        cur = self.db.cursor()
        cur.execute('delete from cache where addtime<?',
                    (int(time.time()) - expire, ))
        self.db.commit()
        cur.close()

    def size(self, size, step=100):
        # with rtrCache_lock:
        while True:
            if os.path.getsize(self.filename) < size:
                break
            cur = self.db.cursor()
            cur.execute('select id from cache order by addtime asc limit ?',
                        (step, ))
            rows = cur.fetchall()
            if not rows:
                cur.close()
                break
            cur.execute(
                'delete from cache where id in (' + ','.join(len(rows) * '?') +
                ')', [x[0] for x in rows])
            self.db.commit()
            cur.close()

    def flush(self):
        # with rtrCache_lock:
        cur = self.db.cursor()
        cur.execute('delete from cache')
        self.db.commit()
        cur.close()

    def _connect(self):
        with rtrCache_lock:
            dirname = xbmc.translatePath('special://temp')
            for subdir in ('xbmcup', 'plugin.video.torrenter'):
                dirname = os.path.join(dirname, subdir)
                if not xbmcvfs.exists(dirname):
                    xbmcvfs.mkdir(dirname)

            self.filename = os.path.join(dirname, self.name)

            first = False
            if not xbmcvfs.exists(self.filename):
                first = True

            self.db = sqlite.connect(self.filename, check_same_thread=False)
            if not first:
                cur = self.db.cursor()
                try:
                    cur.execute('select version from db_ver')
                    row = cur.fetchone()
                    if not row or float(row[0]) != self.version:
                        cur.execute('drop table cache')
                        cur.execute('drop table if exists db_ver')
                        first = True
                except:
                    cur.execute('drop table cache')
                    first = True
                self.db.commit()
                cur.close()

            if first and not self.first_time():
                cur = self.db.cursor()
                cur.execute('pragma auto_vacuum=1')
                cur.execute(
                    'create table cache(id varchar(255) unique, addtime integer, expire integer, data blob)'
                )
                cur.execute('create index time on cache(addtime asc)')
                cur.execute('create table db_ver(version real)')
                cur.execute('insert into db_ver(version) values(?)',
                            (self.version, ))
                self.db.commit()
                cur.close()

    def first_time(self):
        scrapers = {
            'tvdb': 'TheTVDB.com',
            'tmdb': 'TheMovieDB.org',
            'kinopoisk': 'KinoPoisk.ru'
        }
        ok = xbmcgui.Dialog().yesno(
            Localization.localize('Content Lists'),
            Localization.localize('Do you want to preload full metadata?') +
            ' (%s)' %
            (scrapers[os.path.basename(self.filename).split('.')[0]]),
            Localization.localize('It is highly recommended!'))
        if ok:
            return self.download()
        else:
            return False

    def download(self):
        dirname = os.path.dirname(self.filename)
        zipname = os.path.basename(self.filename).replace('.db', '') + '.zip'
        url = 'http://www.tat-store.ru/torrenter/' + zipname
        self.http = HTTP()
        response = self.http.fetch(url,
                                   download=os.path.join(dirname, zipname),
                                   progress=True)
        if response.error:
            return False

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r')
            filezip.extractall(dirname)
            filezip.close()
        except:
            return False

        return True
Exemplo n.º 34
0
    def attackGET(self, http_res):
        """This method performs the cross site scripting attack (XSS attack) with method GET"""

        # copies
        page = http_res.path
        params_list = http_res.get_params
        resp_headers = http_res.headers
        referer = http_res.referer
        headers = {}
        if referer:
            headers["referer"] = referer

        # Some PHP scripts doesn't sanitize data coming from $_SERVER['PHP_SELF']
        if page not in self.PHP_SELF:
            evil_req = None
            if page.endswith("/"):
                evil_req = HTTP.HTTPResource(page + self.php_self_payload)
            elif page.endswith(".php"):
                evil_req = HTTP.HTTPResource(page + "/" +
                                             self.php_self_payload)
            if evil_req is not None:
                if self.verbose == 2:
                    print(u"+ {0}".format(evil_req.url))
                data, http_code = self.HTTP.send(
                    evil_req, headers=headers).getPageCode()
                if self._validXSSContentType(
                        evil_req) and self.php_self_check in data:
                    self.logR(Vulnerability.MSG_PATH_INJECT, self.MSG_VULN,
                              page)
                    self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url)

                    self.logVuln(
                        category=Vulnerability.XSS,
                        level=Vulnerability.HIGH_LEVEL,
                        request=evil_req,
                        parameter="PHP_SELF",
                        info=
                        _("XSS vulnerability found via injection in the resource path"
                          ))
            self.PHP_SELF.append(page)

        # page is the url of the script
        # params_list is a list of [key, value] lists
        if not params_list:
            # Do not attack application-type files
            if not "content-type" in resp_headers:
                # Sometimes there's no content-type... so we rely on the document extension
                if (page.split(".")[-1]
                        not in self.allowed) and page[-1] != "/":
                    return
            elif not "text" in resp_headers["content-type"]:
                return

            url = page + "?__XSS__"
            if url not in self.attackedGET:
                self.attackedGET.append(url)
                code = self.random_string()
                test_url = HTTP.HTTPResource(page + "?" + code)
                self.GET_XSS[code] = (test_url, "QUERY_STRING")
                try:
                    resp = self.HTTP.send(test_url, headers=headers)
                    data = resp.getPage()
                except requests.exceptions.Timeout:
                    data = ""
                    resp = None
                if code in data:
                    payloads = self.generate_payloads(data, code)
                    for payload in payloads:
                        evil_req = HTTP.HTTPResource(page + "?" +
                                                     self.HTTP.quote(payload))
                        if self.verbose == 2:
                            print(u"+ {0}".format(evil_req))
                        try:
                            resp = self.HTTP.send(evil_req, headers=headers)
                            dat = resp.getPage()
                        except requests.exceptions.Timeout, timeout:
                            dat = ""
                            resp = timeout
                        param_name = "QUERY_STRING"

                        if self._validXSSContentType(
                                evil_req) and dat is not None and len(dat) > 1:
                            if payload.lower() in dat.lower():
                                self.SUCCESSFUL_XSS[code] = payload
                                self.logVuln(
                                    category=Vulnerability.XSS,
                                    level=Vulnerability.HIGH_LEVEL,
                                    request=evil_req,
                                    parameter=param_name,
                                    info=
                                    _("XSS vulnerability found via injection in the query string"
                                      ))

                                self.logR(Vulnerability.MSG_QS_INJECT,
                                          self.MSG_VULN, page)
                                self.logR(Vulnerability.MSG_EVIL_URL,
                                          evil_req.url)
                                # No more payload injection
                                break
Exemplo n.º 35
0
    def attackGET(self, http_res):
        """This method performs the Blind SQL attack with method GET"""
        page = http_res.path
        params_list = http_res.get_params
        resp_headers = http_res.headers
        referer = http_res.referer
        headers = {}
        if referer:
            headers["referer"] = referer

        if not params_list:
            # Do not attack application-type files
            if not "content-type" in resp_headers:
                # Sometimes there's no content-type... so we rely on the document extension
                if (page.split(".")[-1]
                        not in self.allowed) and page[-1] != "/":
                    return
            elif not "text" in resp_headers["content-type"]:
                return

            pattern_url = page + "?__SQL__"
            if pattern_url in self.excludedGET:
                return

            if pattern_url not in self.attackedGET:
                self.attackedGET.append(pattern_url)
                err500 = 0
                for payload in self.blind_sql_payloads:
                    if "[VALUE]" in payload:
                        continue
                    payload = self.HTTP.quote(
                        payload.replace("__TIME__", self.TIME_TO_SLEEP))
                    url = page + "?" + payload
                    evil_req = HTTP.HTTPResource(url)
                    if self.verbose == 2:
                        print(u"+ {0}".format(evil_req.url))
                    try:
                        resp = self.HTTP.send(evil_req, headers=headers)
                        data, code = resp.getPageCode()
                    except requests.exceptions.Timeout:
                        self.logVuln(
                            category=Vulnerability.BLIND_SQL_INJECTION,
                            level=Vulnerability.HIGH_LEVEL,
                            request=evil_req,
                            parameter="QUERY_STRING",
                            info=_("{0} via injection in the query string"
                                   ).format(self.MSG_VULN))
                        self.logR(Vulnerability.MSG_QS_INJECT, self.MSG_VULN,
                                  page)
                        self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url)
                        break
                    else:
                        if code == "500" and err500 == 0:
                            err500 = 1
                            self.logAnom(category=Anomaly.ERROR_500,
                                         level=Anomaly.HIGH_LEVEL,
                                         request=evil_req,
                                         parameter="QUERY_STRING",
                                         info=Anomaly.MSG_QS_500)
                            self.logO(Anomaly.MSG_500, page)
                            self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)
        else:
            for i in range(len(params_list)):
                saved_value = params_list[i][1]

                param_name = self.HTTP.quote(params_list[i][0])
                params_list[i][1] = "__SQL__"
                pattern_url = page + "?" + self.HTTP.encode(params_list)

                # This field was successfully attacked with a non-blind SQL injection
                if pattern_url in self.excludedGET:
                    params_list[i][1] = saved_value
                    continue

                if pattern_url not in self.attackedGET:
                    self.attackedGET.append(pattern_url)

                    err500 = 0
                    for payload in self.blind_sql_payloads:
                        payload = payload.replace("[VALUE]", saved_value)
                        params_list[i][1] = self.HTTP.quote(
                            payload.replace("__TIME__", self.TIME_TO_SLEEP))
                        url = page + "?" + self.HTTP.encode(params_list)
                        evil_req = HTTP.HTTPResource(url)
                        if self.verbose == 2:
                            print(u"+ {0}".format(evil_req.url))
                        try:
                            resp = self.HTTP.send(evil_req, headers=headers)
                            data, code = resp.getPageCode()
                        except requests.exceptions.Timeout:
                            self.logVuln(
                                category=Vulnerability.BLIND_SQL_INJECTION,
                                level=Vulnerability.HIGH_LEVEL,
                                request=evil_req,
                                parameter=param_name,
                                info=_("{0} via injection in "
                                       "the parameter {1}").format(
                                           self.MSG_VULN, param_name))
                            self.logR(Vulnerability.MSG_PARAM_INJECT,
                                      self.MSG_VULN, page, param_name)
                            self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url)
                            # One payload worked. Now jum to next field
                            break
                        else:
                            if code == "500" and err500 == 0:
                                err500 = 1
                                self.logAnom(category=Anomaly.ERROR_500,
                                             level=Anomaly.HIGH_LEVEL,
                                             request=evil_req,
                                             parameter=param_name,
                                             info=Anomaly.MSG_PARAM_500.format(
                                                 param_name))
                                self.logO(Anomaly.MSG_500, page)
                                self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)
                params_list[i][1] = saved_value
Exemplo n.º 36
0
    def attackGET(self, http_res):
        """This method performs the file handling attack with method GET"""
        page = http_res.path
        params_list = http_res.get_params
        resp_headers = http_res.headers
        referer = http_res.referer
        headers = {}
        if referer:
            headers["referer"] = referer

        if not params_list:
            # Do not attack application-type files
            if not "content-type" in resp_headers:
                # Sometimes there's no content-type... so we rely on the document extension
                if (page.split(".")[-1]
                        not in self.allowed) and page[-1] != "/":
                    return
            elif not "text" in resp_headers["content-type"]:
                return

            timeouted = False
            warn = 0
            inc = 0
            err500 = 0

            for payload in self.payloads:
                if "[VALUE]" in payload or "[DIRVALUE]" in payload or "[FILE_NAME]" in payload:
                    continue
                err = ""
                url = page + "?" + self.HTTP.quote(payload)
                if url not in self.attackedGET:
                    if self.verbose == 2:
                        print(u"+ {0}".format(url))
                    self.attackedGET.append(url)
                    evil_req = HTTP.HTTPResource(url)
                    try:
                        data, code = self.HTTP.send(
                            evil_req, headers=headers).getPageCode()
                    except requests.exceptions.Timeout:
                        # Display a warning about timeout only once for a parameter
                        if timeouted:
                            continue
                        data = ""
                        code = "408"
                        err = ""
                        self.logAnom(category=Anomaly.RES_CONSUMPTION,
                                     level=Anomaly.MEDIUM_LEVEL,
                                     request=evil_req,
                                     info=Anomaly.MSG_QS_TIMEOUT)
                        self.logO(Anomaly.MSG_TIMEOUT, page)
                        self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)
                        timeouted = True
                    else:
                        err, inc, warn = self.__findPatternInResponse(
                            data, warn)

                    if err != "":
                        self.logVuln(
                            category=Vulnerability.FILE_HANDLING,
                            level=Vulnerability.HIGH_LEVEL,
                            request=evil_req,
                            info=_("{0} via injection in the query string"
                                   ).format(err))
                        self.logR(Vulnerability.MSG_QS_INJECT, err)
                        self.logR(Vulnerability.MSG_EVIL_URL)
                        if inc:
                            break
                    else:
                        if code == "500" and err500 == 0:
                            err500 = 1
                            self.logAnom(category=Anomaly.ERROR_500,
                                         level=Anomaly.HIGH_LEVEL,
                                         request=evil_req,
                                         info=Anomaly.MSG_QS_500)
                            self.logO(Anomaly.MSG_500, evil_req.path)
                            self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)

        for i in range(len(params_list)):
            timeouted = False
            warn = 0
            inc = 0
            err500 = 0
            param_name = self.HTTP.quote(params_list[i][0])
            saved_value = params_list[i][1]

            for payload in self.payloads:
                err = ""

                payload = payload.replace('[VALUE]', saved_value)
                payload = payload.replace('[DIRVALUE]',
                                          saved_value.rsplit('/', 1)[0])
                payload = payload.replace('[FILE_NAME]', http_res.file_name)

                params_list[i][1] = self.HTTP.quote(payload)
                url = page + "?" + self.HTTP.encode(params_list)
                if url not in self.attackedGET:
                    if self.verbose == 2:
                        print(u"+ {0}".format(url))
                    self.attackedGET.append(url)
                    evil_req = HTTP.HTTPResource(url)
                    try:
                        data, code = self.HTTP.send(
                            evil_req, headers=headers).getPageCode()
                    except requests.exceptions.Timeout:
                        if timeouted:
                            continue
                        data = ""
                        code = "408"
                        err = ""
                        self.logAnom(
                            category=Anomaly.RES_CONSUMPTION,
                            level=Anomaly.MEDIUM_LEVEL,
                            request=evil_req,
                            parameter=param_name,
                            info=Anomaly.MSG_PARAM_TIMEOUT.format(param_name))
                        self.logO(Anomaly.MSG_TIMEOUT, page)
                        self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)
                        timeouted = True
                    else:
                        err, inc, warn = self.__findPatternInResponse(
                            data, warn)
                    if err != "":
                        self.logVuln(
                            category=Vulnerability.FILE_HANDLING,
                            level=Vulnerability.HIGH_LEVEL,
                            request=evil_req,
                            parameter=param_name,
                            info=_("{0} via injection in the parameter {1}"
                                   ).format(err, param_name))
                        self.logR(Vulnerability.MSG_PARAM_INJECT, err, page,
                                  param_name)
                        self.logR(Vulnerability.MSG_EVIL_URL, evil_req.url)
                        if inc:
                            break
                    else:
                        if code == "500" and err500 == 0:
                            err500 = 1
                            self.logAnom(
                                category=Anomaly.ERROR_500,
                                level=Anomaly.HIGH_LEVEL,
                                request=evil_req,
                                parameter=param_name,
                                info=Anomaly.MSG_PARAM_500.format(param_name))
                            self.logO(Anomaly.MSG_500, evil_req.path)
                            self.logO(Anomaly.MSG_EVIL_URL, evil_req.url)
            params_list[i][1] = saved_value
Exemplo n.º 37
0
    def attackGET(self, http_res):
        """This method performs the CRLF attack with method GET"""
        page = http_res.path
        params_list = http_res.get_params
        resp_headers = http_res.headers
        referer = http_res.referer
        headers = {}
        if referer:
            headers["referer"] = referer

        payload = self.HTTP.quote(
            "http://www.google.fr\r\nwapiti: SVN version")
        if not params_list:
            # Do not attack application-type files
            if not "content-type" in resp_headers:
                # Sometimes there's no content-type... so we rely on the document extension
                if (page.split(".")[-1]
                        not in self.allowed) and page[-1] != "/":
                    return
            elif not "text" in resp_headers["content-type"]:
                return

            url = page + "?" + payload
            if url not in self.attackedGET:
                evil_req = HTTP.HTTPResource(url)
                if self.verbose == 2:
                    print(u"+ {0}".format(evil_req.url))
                try:
                    resp = self.HTTP.send(evil_req, headers=headers)
                    if "wapiti" in resp.getHeaders():
                        self.logVuln(category=Vulnerability.CRLF,
                                     level=Vulnerability.HIGH_LEVEL,
                                     request=evil_req,
                                     info=self.MSG_VULN + " " +
                                     _("(QUERY_STRING)"))
                        self.logR(Vulnerability.MSG_QS_INJECT, self.MSG_VULN,
                                  page)
                        self.logR(Vulnerability.MSG_EVIL_URL, url)
                except requests.exceptions.Timeout:
                    self.logAnom(category=Anomaly.RES_CONSUMPTION,
                                 level=Anomaly.MEDIUM_LEVEL,
                                 request=evil_req,
                                 info=self.MSG_VULN + " " +
                                 _("(QUERY_STRING)"))
                    self.logO(Anomaly.MSG_TIMEOUT, page)
                    self.logO(Anomaly.MSG_EVIL_URL, url)
                except requests.exceptions.HTTPError:
                    # print("Error: The server did not understand this request")
                    pass
                self.attackedGET.append(url)
        else:
            for i in range(len(params_list)):
                saved_value = params_list[i][1]
                # payload is already escaped, see at top
                params_list[i][1] = payload
                param_name = self.HTTP.quote(params_list[i][0])

                url = page + "?" + self.HTTP.encode(params_list)
                if url not in self.attackedGET:
                    self.attackedGET.append(url)
                    evil_req = HTTP.HTTPResource(url)
                    if self.verbose == 2:
                        print(u"+ {0}".format(evil_req.url))
                    try:
                        resp = self.HTTP.send(evil_req, headers=headers)
                        if "wapiti" in resp.getHeaders():
                            self.logVuln(category=Vulnerability.CRLF,
                                         level=Vulnerability.HIGH_LEVEL,
                                         request=evil_req,
                                         parameter=param_name,
                                         info=self.MSG_VULN + " (" +
                                         param_name + ")")
                            self.logR(Vulnerability.MSG_PARAM_INJECT,
                                      self.MSG_VULN, page, param_name)
                            self.logR(Vulnerability.MSG_EVIL_URL, url)
                    except requests.exceptions.Timeout:
                        self.logAnom(category=Anomaly.RES_CONSUMPTION,
                                     level=Anomaly.MEDIUM_LEVEL,
                                     request=evil_req,
                                     parameter=param_name,
                                     info="Timeout (" + param_name + ")")
                        self.logO(Anomaly.MSG_TIMEOUT, page)
                        self.logO(Anomaly.MSG_EVIL_URL, url)
                    except requests.exceptions.HTTPError:
                        self.log(
                            _("Error: The server did not understand this request"
                              ))
                params_list[i][1] = saved_value
Exemplo n.º 38
0
    def attackPOST(self, form):
        """This method performs the Blind SQL attack with method POST"""

        # copies
        get_params = form.get_params
        post_params = form.post_params
        file_params = form.file_params
        referer = form.referer

        for params_list in [get_params, post_params, file_params]:
            for i in xrange(len(params_list)):
                saved_value = params_list[i][1]
                param_name = self.HTTP.quote(params_list[i][0])

                if params_list is file_params:
                    params_list[i][1] = ["_SQL__", params_list[i][1][1]]
                else:
                    params_list[i][1] = "__SQL__"

                attack_pattern = HTTP.HTTPResource(form.path,
                                                   method=form.method,
                                                   get_params=get_params,
                                                   post_params=post_params,
                                                   file_params=file_params)

                if attack_pattern in self.excludedPOST:
                    params_list[i][1] = saved_value
                    continue

                err500 = 0
                if attack_pattern not in self.attackedPOST:
                    self.attackedPOST.append(attack_pattern)
                    for payload in self.blind_sql_payloads:
                        if params_list is file_params:
                            payload = payload.replace("[VALUE]",
                                                      saved_value[0])
                            params_list[i][1][0] = payload.replace(
                                "__TIME__", self.TIME_TO_SLEEP)
                        else:
                            payload = payload.replace("[VALUE]", saved_value)
                            params_list[i][1] = payload.replace(
                                "__TIME__", self.TIME_TO_SLEEP)

                        evil_req = HTTP.HTTPResource(form.path,
                                                     method=form.method,
                                                     get_params=get_params,
                                                     post_params=post_params,
                                                     file_params=file_params,
                                                     referer=referer)

                        if self.verbose == 2:
                            print(u"+ {0}".format(evil_req))
                        try:
                            resp = self.HTTP.send(evil_req)
                            data, code = resp.getPageCode()
                        except requests.exceptions.Timeout:
                            # Timeout means time-based SQL injection
                            self.logVuln(
                                category=Vulnerability.BLIND_SQL_INJECTION,
                                level=Vulnerability.HIGH_LEVEL,
                                request=evil_req,
                                parameter=param_name,
                                info=_("{0} via injection in the "
                                       "parameter {1}").format(
                                           self.MSG_VULN, param_name))
                            self.logR(Vulnerability.MSG_PARAM_INJECT,
                                      self.MSG_VULN, evil_req.url, param_name)
                            self.logR(Vulnerability.MSG_EVIL_REQUEST)
                            self.logC(evil_req.http_repr)
                            print('')
                            break

                        else:
                            if code == "500" and err500 == 0:
                                err500 = 1
                                self.logAnom(category=Anomaly.ERROR_500,
                                             level=Anomaly.HIGH_LEVEL,
                                             request=evil_req,
                                             parameter=param_name,
                                             info=Anomaly.MSG_PARAM_500.format(
                                                 param_name))
                                self.logO(Anomaly.MSG_500, evil_req.url)
                                self.logO(Anomaly.MSG_EVIL_REQUEST)
                                self.logC(evil_req.http_repr)
                                print('')
                params_list[i][1] = saved_value
Exemplo n.º 39
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform
        self.root = os.path.dirname(os.path.dirname(__file__))

    def check_exist(self):
        for libname in get_libname(self.platform):
            if not xbmcvfs.exists(os.path.join(self.dest_path, libname)):
                return False
        return True

    def check_update(self):
        need_update = False
        for libname in get_libname(self.platform):
            if libname != 'liblibtorrent.so':
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath = os.path.join(self.root,
                                             self.platform['system'],
                                             self.platform['version'],
                                             libname + '.size.txt')
                size = str(os.path.getsize(self.libpath))
                size_old = open(self.sizepath, "r").read()
                if size_old != size:
                    need_update = True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        __settings__ = xbmcaddon.Addon(id='plugin.video.alfa')  ### Alfa
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'],
                                       self.platform['version'], libname)
            if libname != 'liblibtorrent.so':
                try:
                    self.http = HTTP()
                    self.http.fetch(url,
                                    download=dest + ".zip",
                                    progress=False)  ### Alfa
                    log("%s -> %s" % (url, dest))
                    xbmc.executebuiltin(
                        'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path),
                        True)
                    xbmcvfs.delete(dest + ".zip")
                except:
                    text = 'Failed download %s!' % libname
                    xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" %
                                        (__plugin__, text, 750, __icon__))
            else:
                xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'),
                             dest,
                             silent=True)  ### Alfa
            dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Path')), \
                            'lib', libname)                                     ### Alfa
            xbmcvfs.copy(dest, dest_alfa, silent=True)  ### Alfa
            dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Profile')), \
                            'custom_code', 'lib', libname)                      ### Alfa
            xbmcvfs.copy(dest, dest_alfa, silent=True)  ### Alfa
        return True

    def android_workaround(self, new_dest_path):  ### Alfa (entera)
        import subprocess

        for libname in get_libname(self.platform):
            libpath = os.path.join(self.dest_path, libname)
            size = str(os.path.getsize(libpath))
            new_libpath = os.path.join(new_dest_path, libname)

            if xbmcvfs.exists(new_libpath):
                new_size = str(os.path.getsize(new_libpath))
                if size != new_size:
                    xbmcvfs.delete(new_libpath)
                    if xbmcvfs.exists(new_libpath):
                        try:
                            command = ['su', '-c', 'rm', '%s' % new_libpath]
                            p = subprocess.Popen(command,
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.PIPE)
                            output_cmd, error_cmd = p.communicate()
                            log('Comando ROOT: %s' % str(command))
                        except:
                            log('Sin PERMISOS ROOT: %s' % str(command))

                    if not xbmcvfs.exists(new_libpath):
                        log('Deleted: (%s) %s -> (%s) %s' %
                            (size, libpath, new_size, new_libpath))

            if not xbmcvfs.exists(new_libpath):
                xbmcvfs.copy(libpath, new_libpath, silent=True)  ### ALFA
                log('Copying... %s -> %s' % (libpath, new_libpath))

                if not xbmcvfs.exists(new_libpath):
                    try:
                        command = [
                            'su', '-c', 'cp',
                            '%s' % libpath,
                            '%s' % new_libpath
                        ]
                        p = subprocess.Popen(command,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
                        output_cmd, error_cmd = p.communicate()
                        log('Comando ROOT: %s' % str(command))

                        command = [
                            'su', '-c', 'chmod', '775',
                            '%s' % new_libpath
                        ]
                        p = subprocess.Popen(command,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
                        output_cmd, error_cmd = p.communicate()
                        log('Comando ROOT: %s' % str(command))
                    except:
                        log('Sin PERMISOS ROOT: %s' % str(command))

                    if not xbmcvfs.exists(new_libpath):
                        log('ROOT Copy Failed!')

                else:
                    command = ['chmod', '775', '%s' % new_libpath]
                    p = subprocess.Popen(command,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE)
                    output_cmd, error_cmd = p.communicate()
                    log('Comando: %s' % str(command))
            else:
                log('Module exists.  Not copied... %s' % new_libpath)  ### ALFA

        return new_dest_path
Exemplo n.º 40
0
class KinoPoisk:
    """

    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны

    """

    def __init__(self):
        self.cache = Cache('kinopoisk.db', 1.0)
        self.html = Clear()

        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, search, year=None, trailer_quality=None):

        try:
            if isinstance(search, list):
                search = search[0] or ""
            tag = 'scraper:' + urllib.quote_plus(search.encode('windows-1251'))
        except Exception:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id, trailer_quality)

    def movie(self, id, trailer_quality=None):
        id = str(id)

        if trailer_quality is None:
            trailer_quality = 6

        movie = self.cache.get('movie:' + id, self._movie, id)
        if not movie:
            return None

        if 'trailers' in movie and movie['trailers']:
            # компилируем список с нужным нам качеством
            video = []
            for m in movie['trailers']:
                url = [x for x in m['video'] if x[0] <= trailer_quality]
                if url:
                    m['video'] = url[-1]
                    video.append(m)

            movie['trailers'] = video

            if movie['trailers']:
                # готовим главный трейлер
                r = [x for x in movie['trailers'] if x['trailer']]
                if r:
                    movie['info']['trailer'] = r[0]['video'][1]
                else:
                    # если трейлер не найден, то отдаем что попало...
                    movie['info']['trailer'] = movie['trailers'][0]['video'][1]

        return movie

    def search(self, name, trailer_quality=None):
        return self._search_movie(name)

    def best(self, **kwarg):
        page = kwarg.get('page', 1)
        limit = kwarg.get('limit', 50)

        url = 'http://www.kinopoisk.ru/top/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str(kwarg.get('votes', 100)) + '/'

        if kwarg.get('dvd'):
            url += 'm_act%5Bis_dvd%5D/on/'

        if kwarg.get('decade'):
            url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/'

        if kwarg.get('genre'):
            url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/'

        if kwarg.get('country'):
            url += 'm_act%5Bcountry%5D/' + str(kwarg['country']) + '/'

        if kwarg.get('rate'):
            url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/'

        if kwarg.get('mpaa'):
            url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/'

        url += 'perpage/' + str(limit) + '/order/ex_rating/'

        if page > 1:
            url += 'page/' + str(page) + '/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = {'pages': (1, 0, 1, 0), 'data': []}

        r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo', re.U | re.S).search(response.body.decode('windows-1251'))
        if r:

            body = r.group(1)

            # compile pagelist
            p = re.compile('>([0-9]+)&mdash;[0-9]+[^0-9]+?([0-9]+)', re.U).search(body)
            if p:
                page = (int(p.group(1)) - 1) / limit + 1
                total = int(p.group(2))
                pages = total / limit
                if limit * pages != total:
                    pages += 1
                res['pages'] = (pages, 0 if page == 1 else page - 1, page, 0 if page == pages else page + 1)
            # end compile

            for id in re.compile('<div id="tr_([0-9]+)"', re.U | re.S).findall(body):
                res['data'].append(int(id))

        return res

    def person(self, name):
        response = self.http.fetch('http://www.kinopoisk.ru/s/type/people/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) + '/order/relevant/', headers=self.headers)
        if response.error:
            return None

        res = []
        body = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(response.body.decode('windows-1251'))
        if body:

            for block in re.compile('<p class="pic">(.+?)<div class="clear">', re.U | re.S).findall(body.group(1)):

                id, name, original, year, poster = None, None, None, None, None

                r = re.compile('<p class="name"><a href="http://www\.kinopoisk\.ru/level/4/people/([0-9]+)[^>]+>([^<]+)</a>', re.U | re.S).search(block)
                if r:
                    id = r.group(1)
                    name = r.group(2).strip()

                    if id and name:

                        r = re.compile('<span class="gray">([^<]+)</span>', re.U | re.S).search(block)
                        if r:
                            original = r.group(1).strip()
                            if not original:
                                original = None

                        r = re.compile('<span class="year">([0-9]{4})</span>', re.U | re.S).search(block)
                        if r:
                            year = int(r.group(1))

                        if block.find('no-poster.gif') == -1:
                            poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg'

                        res.append({'id': int(id), 'name': name, 'originalname': original, 'year': year, 'poster': poster})

        return {'pages': (1, 0, 1, 0), 'data': res}

    def work(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) + '/', headers=self.headers)
        if response.error:
            return None

        res = {}

        r = re.compile('id="sort_block">(.+?)<style>', re.U | re.S).search(response.body.decode('windows-1251'))
        if r:
            for block in r.group(1).split(u'<table cellspacing="0" cellpadding="0" border="0" width="100%">'):
                work = None

                for w in ('actor', 'director', 'writer', 'producer', 'producer_ussr', 'composer', 'operator', 'editor', 'design', 'voice', 'voice_director'):
                    if block.find(u'id="' + w + u'"') != -1:
                        work = 'producer' if w == 'producer_ussr' else w
                        break

                if work:

                    movies = []

                    for id, name in re.compile('<span class="name"><a href="/film/([0-9]+)/" >([^<]+?)</a>', re.U).findall(block):
                        for tag in (u'(мини-сериал)', u'(сериал)'):
                            if name.find(tag) != -1:
                                break
                        else:
                            movies.append(int(id))

                    if movies:
                        res.setdefault(work, []).extend(movies)

        return res

    def review(self, id, query):
        query_s = 'all' if query == 'stat' else query
        data = self.cache.get('review:' + str(id) + ':' + query_s, self._review, id, query_s)
        if not data:
            return data
        return data[query]

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    # PRIVATE

    def _search_movie(self, name, year=None):
        url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus(name.encode('windows-1251'))  # + '/order/relevant'
        if year:
            url += '/m_act%5Byear%5D/' + str(year)
        url += '/m_act%5Btype%5D/film/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = []
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(response.body.decode('windows-1251'))
        if r:
            for id in re.compile('<p class="name"><a href="/level/1/film/([0-9]+)', re.U | re.S).findall(r.group(1)):
                res.append(int(id))

        return {'pages': (1, 0, 1, 0), 'data': res}

    def _scraper(self, name, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  # week

        ids = self._search_movie(name, year)

        if ids is None:
            return False, None

        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None

        else:
            return timeout, ids['data'][0]

    def _review(self, id, query):
        url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/'
        if query in ('good', 'bad', 'neutral'):
            url += 'status/' + query + '/'
        url += 'perpage/200/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'stat': {'all': 0, 'good': 0, 'bad': 0, 'neutral': 0},
            query: []
        }

        r = re.compile('<ul class="resp_type">(.+?)</ul>', re.U | re.S).search(html)
        if r:
            ul = r.group(1)

            for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')):
                r = re.compile('<li class="' + q + '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>', re.U).search(ul)
                if r:
                    res['stat'][t] = int(r.group(1))

            res['stat']['all'] = res['stat']['good'] + res['stat']['bad'] + res['stat']['neutral']

        r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(html)
        if r:

            for block in r.group(1).split('itemprop="reviews"'):

                review = {
                    'nick': None,
                    'count': None,
                    'title': None,
                    'review': None,
                    'time': None
                }

                r = re.compile('itemprop="reviewBody">(.+?)</div>', re.U | re.S).search(block)
                if r:

                    text = r.group(1)
                    for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'), (u'</b>', u'[/B]'), (u'<i>', u'[I]'), (u'</i>', u'[/I]'), (u'<u>', u'[U]'), (u'</u>', u'[/U]')):
                        text = text.replace(tag1, tag2)

                    r = self.html.text(text)
                    if r:
                        review['review'] = r

                user = None
                r = re.compile('<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>').search(block)
                if r:
                    user = self.html.string(r.group(1))
                else:
                    r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>').search(block)
                    if r:
                        user = self.html.string(r.group(1))
                if user:
                    review['nick'] = user

                r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search(block)
                if r:
                    title = self.html.string(r.group(1))
                    if title:
                        review['title'] = title

                r = re.compile('<span class="date">([^<]+)</span>', re.U | re.S).search(block)
                if r:
                    review['time'] = r.group(1).replace(u' |', u',')

                r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>', re.U | re.S).search(block)
                if r:
                    review['count'] = int(r.group(1))

                if review['nick'] and review['review']:
                    res[query].append(review)

        return 3600, res  # one hour

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'icon': None,
            'thumbnail': None,
            'info': {
                'count': int(id)
            },
            'properties': {
                'fanart_image': None,
            },
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, t in (
            ('title', '<title>(.+?)</title>', 'str'),
            ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', 'str'),
            ('tagline', '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>', 'str'),
            ('mpaa', 'itemprop="contentRating"\s+content="MPAA\s+([^"]+)"', 'str'),
            ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>', 'str'),
            ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', 'int'),
            ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', 'int')

        ):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = value
                    if t == 'int':
                        res['info'][tag] = int(res['info'][tag])
                    else:
                        res['info'][tag] = self.html.string(res['info'][tag])

        # режисеры, сценаристы, жанры
        for tag, reg in (
            ('director', u'<td itemprop="director">(.+?)</td>'),
            ('writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
            ('genre', u'<td itemprop="genre">(.+?)</td>')
        ):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4><ul>(.+?)</ul>', re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile('<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                # res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile('<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>', re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()

        # # премьера
        # r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U|re.S).search(html)
        # if r:
        #     r = re.compile(u'data\-ical\-date="([^"]+)"', re.U|re.S).search(r.group(1))
        #     if r:
        #         data = r.group(1).split(' ')
        #         if len(data) == 3:
        #             i = 0
        #             for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'):
        #                 i += 1
        #                 if mon == data[1]:
        #                     mon = str(i)
        #                     if len(mon) == 1:
        #                         mon = '0' + mon
        #                     day = data[0]
        #                     if len(day) == 1:
        #                         day = '0' + day
        #                     res['info']['premiered'] = '-'.join([data[2], mon, day])
        #                     break

        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                if poster.startswith("/"):
                    poster = "http://www.kinopoisk.ru%s" % poster
                res['icon'] = poster
                res['thumbnail'] = poster

        menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)</ul>', re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile('<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/', headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(1).strip()

            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['properties']['fanart_image'] and menu.find('/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile('<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>', re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2])]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/', headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(1).strip()

            # # студии
            # if menu.find('/film/' + id + '/studio/') != -1:
            #     response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers)
            #     if not response.error:
            #         html = response.body.decode('windows-1251')
            #         r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U|re.S).search(html)
            #         if r:
            #             studio = []
            #             for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)):
            #                 r = self.html.string(r)
            #                 if r:
            #                     studio.append(r)
            #             if studio:
            #                 res['info']['studio'] = u', '.join(studio)

            # трэйлеры

            # trailers1 = [] # русские трейлеры
            # trailers2 = [] # другие русские видео
            # trailers3 = [] # трейлеры
            # trailers4 = [] # другие видео

            # if menu.find('/film/' + id + '/video/') != -1:
            #     response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/video/', headers=self.headers)
            #     if not response.error:
            #         html = response.body.decode('windows-1251')

            #         for row in re.compile(u'<!-- ролик -->(.+?)<!-- /ролик -->', re.U|re.S).findall(html):

            #             # отсекаем лишние блоки
            #             if row.find(u'>СМОТРЕТЬ</a>') != -1:

            #                 # русский ролик?
            #                 if row.find('class="flag flag2"') == -1:
            #                     is_ru = False
            #                 else:
            #                     is_ru = True

            #                 # получаем имя трейлера
            #                 r = re.compile('<a href="/film/' + id + '/video/[0-9]+/[^>]+ class="all">(.+?)</a>', re.U).search(row)
            #                 if r:
            #                     name = self.html.string(r.group(1))
            #                     if name:

            #                         trailer = {
            #                             'name': name,
            #                             'time': None,
            #                             'trailer': False,
            #                             'ru': is_ru,
            #                             'video': []
            #                         }

            #                         # трейлер или тизер?
            #                         for token in (u'Трейлер', u'трейлер', u'Тизер', u'тизер'):
            #                             if name.find(token) != -1:
            #                                 trailer['trailer'] = True
            #                                 break

            #                         # получаем время трейлера
            #                         r = re.compile(u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>', re.U|re.S).search(row)
            #                         if r:
            #                             trailer['time'] = r.group(1).strip()

            #                         # делим ролики по качеству
            #                         for r in re.compile('trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>', re.U|re.S).findall(row):
            #                             quality = int(r[0])
            #                             if r[1].find('icon-hd') != -1:
            #                                 quality += 3

            #                             trailer['video'].append((quality, r[2].strip(), r[3]))

            #                         if trailer['video']:
            #                             if trailer['ru']:
            #                                 if trailer['trailer']:
            #                                     trailers1.append(trailer)
            #                                 else:
            #                                     trailers2.append(trailer)
            #                             else:
            #                                 if trailer['trailer']:
            #                                     trailers3.append(trailer)
            #                                 else:
            #                                     trailers4.append(trailer)

            # # склеиваем трейлеры
            # res['trailers'].extend(trailers1)
            # res['trailers'].extend(trailers2)
            # res['trailers'].extend(trailers3)
            # res['trailers'].extend(trailers4)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  # week

        return timeout, res
Exemplo n.º 41
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform
        self.root = os.path.dirname(os.path.dirname(__file__))

    def check_exist(self):
        for libname in get_libname(self.platform):
            if not xbmcvfs.exists(os.path.join(self.dest_path, libname)):
                return False
        return True

    def check_update(self):
        need_update = False
        for libname in get_libname(self.platform):
            if libname != 'liblibtorrent.so':
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath = os.path.join(self.root,
                                             self.platform['system'],
                                             self.platform['version'],
                                             libname + '.size.txt')
                size = str(os.path.getsize(self.libpath))
                size_old = open(self.sizepath, "r").read()
                if size_old != size:
                    need_update = True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'],
                                       self.platform['version'], libname)
            if libname != 'liblibtorrent.so':
                try:
                    self.http = HTTP()
                    self.http.fetch(url, download=dest + ".zip", progress=True)
                    log("%s -> %s" % (url, dest))
                    xbmc.executebuiltin(
                        'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path),
                        True)
                    xbmcvfs.delete(dest + ".zip")
                except:
                    text = 'Failed download %s!' % libname
                    xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" %
                                        (__plugin__, text, 750, __icon__))
            else:
                xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'),
                             dest)
        return True

    def android_workaround(self, new_dest_path):
        for libname in get_libname(self.platform):
            libpath = os.path.join(self.dest_path, libname)
            size = str(os.path.getsize(libpath))
            new_libpath = os.path.join(new_dest_path, libname)

            if not xbmcvfs.exists(new_libpath):
                xbmcvfs.copy(libpath, new_libpath)
                log('Copied %s -> %s' % (libpath, new_libpath))
            else:
                new_size = str(os.path.getsize(new_libpath))
                if size != new_size:
                    xbmcvfs.delete(new_libpath)
                    xbmcvfs.copy(libpath, new_libpath)
                    log('Deleted and copied (%s) %s -> (%s) %s' %
                        (size, libpath, new_size, new_libpath))
        return new_dest_path
Exemplo n.º 42
0
            # Search for permanent XSS vulns which were injected via GET
            if self.doGET == 1:
                for code in self.GET_XSS:
                    if code in data:
                        # code found in the webpage !
                        code_url = self.GET_XSS[code][0].url
                        page = self.GET_XSS[code][0].path
                        param_name = self.GET_XSS[code][1]
                        if code in self.SUCCESSFUL_XSS:
                            # is this an already known vuln (reflected XSS)
                            if self.validXSS(data, code,
                                             self.SUCCESSFUL_XSS[code]):
                                # if we can find the payload again, this is a stored XSS
                                evil_req = HTTP.HTTPResource(
                                    code_url.replace(
                                        code, self.SUCCESSFUL_XSS[code]))

                                if param_name == "QUERY_STRING":
                                    self.logR(Vulnerability.MSG_QS_INJECT,
                                              self.MSG_VULN, page)
                                else:
                                    self.logR(Vulnerability.MSG_PARAM_INJECT,
                                              self.MSG_VULN, page, param_name)
                                self.logR(Vulnerability.MSG_EVIL_URL, code_url)

                                self.logVuln(
                                    category=Vulnerability.XSS,
                                    level=Vulnerability.HIGH_LEVEL,
                                    request=evil_req,
                                    info=_("Found permanent XSS in {0}"