def __loadRemote(self, inputList, lItem): try: inputList.curr_url = lItem['url'] count = 0 i = 1 maxits = 2 # 1 optimistic + 1 demystified ignoreCache = False demystify = False back = '' startUrl = inputList.curr_url #print inputList, lItem while count == 0 and i <= maxits: if i > 1: ignoreCache = True demystify = True # Trivial: url is from known streamer if back: lItem['referer'] = back items = self.__parseHtml(inputList.curr_url, '"' + inputList.curr_url + '"', inputList.rules, inputList.skill, inputList.cfg, lItem) count = len(items) # try to find items in html source code if count == 0: referer = '' if lItem['referer']: referer = lItem['referer'] data = common.getHTML(inputList.curr_url, None, referer, False, False, ignoreCache, demystify) if data == '': return False msg = 'Remote URL ' + inputList.curr_url + ' opened' if demystify: msg += ' (demystified)' common.log(msg) if inputList.section != '': section = inputList.section data = self.__getSection(data, section) if lItem['section']: section = lItem['section'] data = self.__getSection(data, section) items = self.__parseHtml(inputList.curr_url, data, inputList.rules, inputList.skill, inputList.cfg, lItem) count = len(items) common.log(' -> ' + str(count) + ' item(s) found') # find rtmp stream common.log('Find rtmp stream') if count == 0: item = self.__findRTMP(data, startUrl, lItem) if item: items = [] items.append(item) count = 1 # find embedding javascripts common.log('Find embedding javascripts') if count == 0: item = findJS(data) if item: firstJS = item[0] streamId = firstJS[0] jsUrl = firstJS[1] if not jsUrl.startswith('http://'): jsUrl = urllib.basejoin(startUrl,jsUrl) streamerName = getHostName(jsUrl) jsSource = getHTML(jsUrl, None, startUrl) phpUrl = findPHP(jsSource, streamId) if phpUrl: data = getHTML(phpUrl, None, startUrl) item = self.__findRTMP(data, phpUrl, lItem) if item: if streamerName: item['title'] = item['title'].replace('RTMP', streamerName) items = [] items.append(item) count = 1 else: red = phpUrl common.log(' -> Redirect: ' + red) if back == red: break back = inputList.curr_url inputList.curr_url = red common.log(str(len(inputList.items)) + ' items ' + inputList.cfg + ' -> ' + red) startUrl = red continue # find redirects common.log('find redirects') if count == 0: red = self.__findRedirect(startUrl, inputList.curr_url) if startUrl == red: common.log(' -> No redirect found') else: common.log(' -> Redirect: ' + red) if back == red: break back = inputList.curr_url inputList.curr_url = red common.log(str(len(inputList.items)) + ' items ' + inputList.cfg + ' -> ' + red) startUrl = red i = 0 i += 1 if count != 0: inputList.items = inputList.items + items except: traceback.print_exc(file = sys.stdout) return False return True
def __loadRemote(self, inputList, lItem): try: inputList.curr_url = lItem['url'] count = 0 i = 1 maxits = 2 # 1 optimistic + 1 demystified ignoreCache = False demystify = False startUrl = inputList.curr_url #print inputList, lItem while count == 0 and i <= maxits: if i > 1: ignoreCache = True demystify = True # Trivial: url is from known streamer items = self.__parseHtml(inputList.curr_url, '"' + inputList.curr_url + '"', inputList.rules, inputList.skill, inputList.cfg, lItem) count = len(items) # try to find items in html source code if count == 0: referer = '' if lItem['referer']: referer = lItem['referer'] data = common.getHTML(inputList.curr_url, referer, ignoreCache, demystify) if data == '': return False msg = 'Remote URL ' + str(inputList.curr_url) + ' opened' if demystify: msg += ' (demystified)' common.log(msg) if inputList.section != '': section = inputList.section data = self.__getSection(data, section) if lItem['section']: section = lItem['section'] data = self.__getSection(data, section) items = self.__parseHtml(inputList.curr_url, data, inputList.rules, inputList.skill, inputList.cfg, lItem) count = len(items) common.log(' -> ' + str(count) + ' item(s) found') # find rtmp stream #common.log('Find rtmp stream') if count == 0: item = self.__findRTMP(data, startUrl, lItem) if item: items = [] items.append(item) count = 1 # find embedding javascripts #common.log('Find embedding javascripts') if count == 0: item = findJS(data) if item: firstJS = item[0] streamId = firstJS[0] jsUrl = firstJS[1] streamerName = getHostName(jsUrl) jsSource = getHTML(jsUrl, startUrl, True, False) phpUrl = findPHP(jsSource, streamId) if phpUrl: data = getHTML(phpUrl, startUrl, True, True) item = self.__findRTMP(data, phpUrl, lItem) if item: if streamerName: item['title'] = item['title'].replace( 'RTMP', streamerName) items = [] items.append(item) count = 1 # find vcods #common.log('find vcods') if count == 0: vcods = findVCods(data) if vcods: sUrl = vcods[0] cod1 = vcods[1] cod2 = vcods[2] swfUrl = vcods[3] unixTS = str(dt.getUnixTimestamp()) sUrl = sUrl + '?callback=jQuery1707757964063647694_1347894980192&v_cod1=' + cod1 + '&v_cod2=' + cod2 + '&_=' + unixTS tmpData = getHTML(sUrl, urllib.unquote_plus(startUrl), True, False) if tmpData and tmpData.find("Bad Request") == -1: newReg = '"result1":"([^\"]+)","result2":"([^\"]+)"' link = regexUtils.findall(tmpData, newReg) if link: _file = link[0][0] rtmp = link[0][1].replace('\\', '') #.replace('/redirect','/vod') item = CListItem() item['title'] = getHostName( sUrl) + '* - ' + _file item['type'] = 'video' item[ 'url'] = rtmp + ' playPath=' + _file + ' swfUrl=' + swfUrl + ' swfVfy=1 live=true pageUrl=' + startUrl item.merge(lItem) items.append(item) count = 1 # find redirects #common.log('find redirects') if count == 0: red = self.__findRedirect(startUrl, inputList.curr_url) if startUrl == red: common.log(' -> No redirect found') else: common.log(' -> Redirect: ' + red) inputList.curr_url = red common.log( str(len(inputList.items)) + ' items ' + inputList.cfg + ' -> ' + red) startUrl = red if lItem['referer']: lItem['referer'] = red i = 0 i += 1 if count != 0: inputList.items = inputList.items + items except IOError: if common.enable_debug: traceback.print_exc(file=sys.stdout) return False return True
def __loadRemote(self, inputList, lItem): try: inputList.curr_url = lItem['url'] count = 0 i = 1 maxits = 2 # 1 optimistic + 1 demystified ignoreCache = False demystify = False startUrl = inputList.curr_url #print inputList, lItem while count == 0 and i <= maxits: if i > 1: ignoreCache = True demystify = True # Trivial: url is from known streamer items = self.__parseHtml(inputList.curr_url, '"' + inputList.curr_url + '"', inputList.rules, inputList.skill, inputList.cfg, lItem) count = len(items) # try to find items in html source code if count == 0: referer = '' if lItem['referer']: referer = lItem['referer'] inputList.curr_url = HTMLParser.HTMLParser().unescape(urllib.unquote(inputList.curr_url)) data = common.getHTML(inputList.curr_url, None, referer, ignoreCache, demystify) if data == '': return False msg = 'Remote URL ' + str(inputList.curr_url) + ' opened' if demystify: msg += ' (demystified)' common.log(msg) if inputList.section != '': section = inputList.section data = self.__getSection(data, section) if lItem['section']: section = lItem['section'] data = self.__getSection(data, section) items = self.__parseHtml(inputList.curr_url, data, inputList.rules, inputList.skill, inputList.cfg, lItem) count = len(items) common.log(' -> ' + str(count) + ' item(s) found') # find rtmp stream #common.log('Find rtmp stream') if count == 0: item = self.__findRTMP(data, startUrl, lItem) if item: items = [] items.append(item) count = 1 # find embedding javascripts #common.log('Find embedding javascripts') if count == 0: item = findJS(data) if item: firstJS = item[0] streamId = firstJS[0] jsUrl = firstJS[1] streamerName = getHostName(jsUrl) jsSource = getHTML(jsUrl, None, startUrl, True, False) phpUrl = findPHP(jsSource, streamId) if phpUrl: data = getHTML(phpUrl, None, startUrl, True, True) item = self.__findRTMP(data, phpUrl, lItem) if item: if streamerName: item['title'] = item['title'].replace('RTMP', streamerName) items = [] items.append(item) count = 1 else: red = phpUrl common.log(' -> Redirect: ' + red) inputList.curr_url = red common.log(str(len(inputList.items)) + ' items ' + inputList.cfg + ' -> ' + red) startUrl = red if lItem['referer']: lItem['referer'] = red continue # find vcods #common.log('find vcods') if count == 0: vcods = findVCods(data) if vcods: sUrl = vcods[0] cod1 = vcods[1] cod2 = vcods[2] swfUrl = vcods[3] unixTS = str(dt.getUnixTimestamp()) sUrl = sUrl + '?callback=jQuery1707757964063647694_1347894980192&v_cod1=' + cod1 + '&v_cod2=' + cod2 + '&_=' + unixTS tmpData = getHTML(sUrl, None, urllib.unquote_plus(startUrl), True, False) if tmpData and tmpData.find("Bad Request") == -1: newReg = '"result1":"([^\"]+)","result2":"([^\"]+)"' link = regexUtils.findall(tmpData, newReg) if link: _file = link[0][0] rtmp = link[0][1].replace('\\','') #.replace('/redirect','/vod') item = CListItem() item['title'] = getHostName(sUrl) + '* - ' + _file item['type'] = 'video' item['url'] = rtmp + ' playPath=' + _file + ' swfUrl=' + swfUrl +' swfVfy=1 live=true pageUrl=' + startUrl item.merge(lItem) items.append(item) count = 1 # find redirects #common.log('find redirects') if count == 0: red = self.__findRedirect(startUrl, inputList.curr_url) if startUrl == red: common.log(' -> No redirect found') else: red = HTMLParser.HTMLParser().unescape(red) red = urllib.unquote(red) common.log(' -> Redirect: ' + red) inputList.curr_url = red common.log(str(len(inputList.items)) + ' items ' + inputList.cfg + ' -> ' + red) startUrl = red if lItem['referer']: lItem['referer'] = red i = 0 i += 1 if count != 0: inputList.items = inputList.items + items except IOError: if common.enable_debug: traceback.print_exc(file = sys.stdout) return False return True