def __init__(self): self.userAgent = 'OobaCacheMgr/' + Version self.urlContext = '' self.socketTimeout = 7.0 self.cacheFolder = 'Z:\\~HttpCache\\' if Emulating: self.cacheFolder = ScriptPath + 'Cache\\' try: os.makedirs(self.cacheFolder) except: pass self.cookiefile = self.cacheFolder + '~cookies.txt' #the tilde is there to ensure that url2xfilename doesn't create a file that might overwrite this self.defaultCachetime = 24 * 60.0 #minutes self.cookies = ClientCookie.LWPCookieJar() try: self.cookies.revert(self.cookiefile) except: print('Could not open cookie file: ' + self.cookiefile) hh = CustomHandler(self.cookies) self.opener = ClientCookie.build_opener(hh)
def set_up_cookie_stuff(): COOKIEFILE = './cookies.txt' cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: try: import ClientCookie except ImportError: urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener)
def __init__(self, url, debug=False): UserDict.__init__(self) self['url'] = url self['COOKIEFILE'] = 'freemed-cookies.lwp' if debug: import logging logger = logging.getLogger("cookielib") logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.DEBUG) cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: pass else: import urllib2 urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar() Request = urllib2.Request if not cookielib: try: import ClientCookie except ImportError: import urllib2 urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen cj = ClientCookie.LWPCookieJar() Request = ClientCookie.Request if cj != None: if os.path.isfile(self['COOKIEFILE']): if debug: print 'DEBUG: Loading cookiefile ' + self['COOKIEFILE'] cj.load(self['COOKIEFILE']) if cookielib: opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler) ClientCookie.install_opener(opener) self['Request'] = Request self['urlopen'] = urlopen self['cj'] = cj
def __init__(self): #Because the login is an ajax post before we need cookies. #That's what made this code annoying to write. #This code should work against either cookielib or ClientCookie depending on #which ever one you have. try: import cookielib #Were taking references to functions / objects here #so later on we don't need to worry about which actual #import we used. self.Request = urllib2.Request self.urlopen = urllib2.urlopen cookie_jar = cookielib.LWPCookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar)) urllib2.install_opener(opener) except ImportError: try: import ClientCookie self.Request = ClientCookie.Request self.urlopen = ClientCookie.urlopen cookie_jar = ClientCookie.LWPCookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookie_jar)) except ImportError: raise ImportError("""This code is dependent on either \'cookielib\' or \'ClientCookie\' #and you have neither. """) self.user = None
def downloadpagewithcookies(url): # Inicializa la librería de las cookies ficherocookies = os.path.join(config.DATA_PATH, 'cookies.lwp') print "Cookiefile=" + ficherocookies cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Host': 'www.meristation.com', 'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Keep-Alive': '300', 'Connection': 'keep-alive' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, None, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() return data
def geturl(urlvideo): xbmc.output("[divxlink.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.divxlink\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron, re.DOTALL).findall(url) xbmc.output("[divxlink.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches) > 0: codigo = matches[0][0] nombre = matches[0][1] patron = '<input type="hidden" name="rand" value="([^"]+)">' matches = re.compile(patron, re.DOTALL).findall(data) #scrapertools.printMatches(matches) randomstring = "" if len(matches) > 0: randomstring = matches[0] xbmc.output("[divxlink.py] randomstring=" + randomstring) txdata = "op=download2&id=" + codigo + "&rand=" + randomstring + "&referer=&method_free=&method_premium=&down_direct=1" xbmc.output(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data patron = '<div id="embedcontmvshre"[^>]+>(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) #scrapertools.printMatches(matches) data = "" if len(matches) > 0: data = matches[0] xbmc.output("[divxlink.py] bloque packed=" + data) else: return "" # Lo descifra descifrado = unpackerjs.unpackjs(data) xbmc.output("descifrado=" + descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron, re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches) > 0: url = matches[0] xbmc.output("[divxlink.py] url=" + url) return url
def getmegauploaduser(login, password): # --------------------------------------- # Inicializa la librería de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.megaupload.com/?c=login" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! passwordesc = password.replace("&", "%26") txdata = "login=1&redir=1&username="******"&password="******"----------------------") xbmc.output("Cookies despues") xbmc.output("----------------------") xbmc.output(cookiedata) xbmc.output("----------------------") ''' patronvideos = 'user="******"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: patronvideos = 'user=([^\;]+);' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0 and DEBUG: xbmc.output("No se ha encontrado la cookie de Megaupload") xbmc.output("----------------------") xbmc.output("Respuesta de Megaupload") xbmc.output("----------------------") xbmc.output(data) xbmc.output("----------------------") xbmc.output("----------------------") xbmc.output("Cookies despues") xbmc.output("----------------------") xbmc.output(cookiedata) xbmc.output("----------------------") devuelve = "" else: devuelve = matches[0] return devuelve
def __init__(self, cookiesUrl, userAgent='Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'): self.cookiesUrl = cookiesUrl self.userAgent = {'User-agent': userAgent} # the path and filename to save your cookies in self.cj = None ClientCookie = None cookielib = None self.httpsForm = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either self.urlopen = urllib2.urlopen self.Request = urllib2.Request else: # imported ClientCookie self.urlopen = ClientCookie.urlopen self.Request = ClientCookie.Request self.cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked self.urlopen = urllib2.urlopen self.Request = urllib2.Request self.cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if self.cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(self.cookiesUrl): # if we have a cookie file already saved # then load the cookies into the Cookie Jar self.cj.load(self.cookiesUrl) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(self.cj)) ClientCookie.install_opener(opener)
def geturl(urlvideo): logger.info("[metadivx.py] url="+urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url=urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.metadivx\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron,re.DOTALL).findall(url) logger.info("[metadivx.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches)>0: codigo = matches[0][0] nombre = matches[0][1] txdata = "op=download1&usr_login=&id="+codigo+"&fname="+nombre+"&referer=&method_free=Continue" logger.info(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data patron = '<div id="embedcontmvshre[^>]+>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) logger.info("[metadivx.py] bloque packed") if len(matches)>0: logger.info(matches[0]) ''' <center> <script type='text/javascript'>eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5tcd6dva|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) </script> </center> ''' # El javascript empaquetado es #eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) ''' eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\' <7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"> <2 1="j"0="i"> <2 1="v"0="u"> <2 1="b"0="5"/> <2 1="c"0="5"/> <2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/> <8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </8> </7>\');',36,51, 0'value 1|name 2|param 3|com 4|http 5|false 6|divx 7|object 8|embed 9|plugin a|go b|bannerEnabled c|autoPlay d| e|320px f|height g|630px h|width i|none j|custommode k|avi l|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_ m|Capitancinema n|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa o|182 p|206 q|45 r|73 s|76 t|src u|auto v|bufferingMode w|id x|download y|pluginspage z|video 10|type 11|embedmvshre 12|cab 13|DivXBrowserPlugin 14|codebase 15|CC0F21721616 16|9C46 17|41fa 18|D0AB 19|67DABFBF 1a|clsid 1b|classid 1c|embedcontmvshre 1d|write 1e|document '.split(' |'))) ''' # El javascript desempaquetado es #document.write('<object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"><param name="custommode"value="none"><param name="bufferingMode"value="auto"><param name="bannerEnabled"value="false"/><param name="autoPlay"value="false"/><param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/><embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"></embed></object>'); ''' <object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"> <param name="custommode"value="none"> <param name="bufferingMode"value="auto"> <param name="bannerEnabled"value="false"/> <param name="autoPlay"value="false"/> <param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </embed> </object>'); ''' # La URL del video es #http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi # Lo descifra descifrado = unpackerjs.unpackjs(data) logger.info("descifrado="+descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron,re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches)>0: url = matches[0] logger.info("[metadivx.py] url="+url) return url
def do_login_and_fetch(self, cj, COOKIEFILE, LOGIN_URL, login_params, fetch_url, save_to, **args): """ Method to do an automated login and save the cookie. This is required for presentation download. """ ClientCookie = None cookielib = None # Properly import the correct cookie lib try: import http.cookiejar except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib.request.urlopen Request = urllib.request.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib.request.urlopen Request = urllib.request.Request cj = http.cookiejar.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) else: opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) headers = { 'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } request = Request(LOGIN_URL, login_params, headers) handle = urlopen(request) if cj: cj.save(COOKIEFILE) request = Request(fetch_url, None, headers) try: handle = urlopen(request) except urllib.error.HTTPError: print('Presentation not available for download!', file=sys.stderr) return data = handle.read() info = handle.info() ext = 'ppt' type = info['Content-Type'] ext = self.get_extension(type) if not save_to: save_to = fetch_url.split('/')[-2] + '.' save_to = save_to + ext fp = open(save_to, 'wb') fp.write(data) fp.close() if self.verbose: print('Presentation downloaded and saved to %s' % save_to)
def GetMegavideoUser(login, password, megavidcookiepath): #New Login code derived from old code by Voinage etc. Makes no need for mechanize module. #if no user or pass are provided, open login file to get them. if login is False or password is False: if os.path.exists(megavidcookiepath): loginf = openfile(self.login) login = get_user(loginf) password = get_pass(loginf) # --------------------------------------- # Cookie stuff # --------------------------------------- ficherocookies = megavidcookiepath # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # install the cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.megavideo.com/?s=signup" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = "action=login&cnext=&snext=&touser=&user=&nickname=" + login + "&password="******"([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: patronvideos = 'user=([^\;]+);' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: print 'something bad happened' return matches[0]
def geturl(urlvideo): xbmc.output("[gigabyupload.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration try: req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() except: data = "" pass #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.gigabyteupload\.com/download\-([^\-]+)\-.*?' matches = re.compile(patron, re.DOTALL).findall(url) id = matches[0] patron = '<form method="post" action="([^"]+)">[^<]+<input type="hidden" name="security_key" value="([^"]+)" \/>' #patron += '<p><input type="submit" name="submit" value="([^"]+)" class="cbutton" \/>' matches = re.compile(patron, re.DOTALL).findall(data) xbmc.output("[gigabyupload.py] fragmentos de la URL : " + str(len(matches))) scrapertools.printMatches(matches) cecid = "" submit = "" url2 = theurl if len(matches) > 0: url2 = matches[0][0] #id = matches[0][5] cecid = matches[0][1] submit = "Watch Online" #aff = matches[0][3] #came_from = matches[0][4] txdata = "op=download&usr_login=&id=" + id + "&security_key=" + cecid + "&submit=" + submit + "&aff=&came_from=referer=&method_free=Free+Stream" xbmc.output(txdata) try: req = Request(url2, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data except: data = "" pass # Extrae el trozo cifrado patron = '<div id="player">[^<]+<script type="text/javascript">(eval.*?)</script>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) data = "" if len(matches) > 0: data = matches[0] xbmc.output("[Gigabyteupload.py] bloque packed=" + data) else: return "" # Lo descifra descifrado = unpackerjs2.unpackjs(data) # Extrae la URL del vídeo xbmc.output("descifrado=" + descifrado) # Extrae la URL patron = '<param name="src" value="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches) > 0: url = matches[0] xbmc.output("[gigabyteupload.py] url=" + url) return url
def geturl(urlvideo): xbmc.output("[vk.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo.replace("&", "&") #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Extrae la URL print data regexp = re.compile(r'vkid=([^\&]+)\&') match = regexp.search(data) vkid = "" print 'match %s' % str(match) if match is not None: vkid = match.group(1) else: print "no encontro vkid" patron = "var video_host = '([^']+)'.*?" patron += "var video_uid = '([^']+)'.*?" patron += "var video_vtag = '([^']+)'.*?" patron += "var video_no_flv = ([^;]+);.*?" patron += "var video_max_hd = '([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: for match in matches: if match[3].strip() == "0" and match[1] != "0": tipo = "flv" if "http://" in match[0]: videourl = "%s/u%s/video/%s.%s" % (match[0], match[1], match[2], tipo) else: videourl = "http://%s/u%s/video/%s.%s" % ( match[0], match[1], match[2], tipo) elif match[ 1] == "0" and vkid != "": #http://447.gt3.vkadre.ru/assets/videos/2638f17ddd39-75081019.vk.flv tipo = "flv" if "http://" in match[0]: videourl = "%s/assets/videos/%s%s.vk.%s" % ( match[0], match[2], vkid, tipo) else: videourl = "http://%s/assets/videos/%s%s.vk.%s" % ( match[0], match[2], vkid, tipo) else: #http://cs12385.vkontakte.ru/u88260894/video/d09802a95b.360.mp4 tipo = "360.mp4" if match[0].endswith("/"): videourl = "%su%s/video/%s.%s" % (match[0], match[1], match[2], tipo) else: videourl = "%s/u%s/video/%s.%s" % (match[0], match[1], match[2], tipo) return videourl
def geturl(urlvideo): videoid = urlvideo # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass #xbmc.output("ficherocookies %s", ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.vimeo.com/moogaloop/load/clip:%s/local/" % videoid #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer': 'http://vimeo/%s' % urlvideo } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) #cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() print data #parseamos el xml en busca del codigo de signatura dom = parseString(data) xml = dom.getElementsByTagName("xml") for node in xml: try: request_signature = getNodeValue( node, "request_signature", "Unknown Uploader").encode("utf-8") request_signature_expires = getNodeValue( node, "request_signature_expires", "Unknown Uploader").encode("utf-8") except: logger.info("Error : Video borrado") return "" try: quality = ((config.getSetting("quality_flv") == "1" and "hd") or "sd") except: quality = "sd" video_url = "http://www.vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % ( videoid, request_signature, request_signature_expires, quality) print video_url # Timeout del socket a 60 segundos socket.setdefaulttimeout(10) h = urllib2.HTTPHandler(debuglevel=0) request = urllib2.Request(video_url) opener = urllib2.build_opener(h) urllib2.install_opener(opener) try: connexion = opener.open(request) video_url = connexion.geturl() except urllib2.HTTPError, e: xbmc.output("[vimeo.py] error %d (%s) al abrir la url %s" % (e.code, e.msg, video_url)) print e.read()
def downloadpageGzip(url): # Inicializa la librería de las cookies ficherocookies = os.path.join(config.DATA_PATH, 'cookies.lwp') print "Cookiefile=" + ficherocookies inicio = time.clock() cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} import httplib parsedurl = urlparse.urlparse(url) print "parsedurl=", parsedurl txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Encoding': 'gzip,deflate', 'Keep-Alive': '300', 'Connection': 'keep-alive', 'Referer': parsedurl[0] + "://" + parsedurl[1] } print txheaders # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, None, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() fin = time.clock() logger.info("[scrapertools.py] Descargado 'Gzipped data' en %d segundos " % (fin - inicio + 1)) # Descomprime el archivo de datos Gzip try: fin = inicio compressedstream = StringIO.StringIO(data) gzipper = gzip.GzipFile(fileobj=compressedstream) data1 = gzipper.read() gzipper.close() fin = time.clock() logger.info( "[scrapertools.py] 'Gzipped data' descomprimido en %d segundos " % (fin - inicio + 1)) return data1 except: return data
def getvideo(urlpagina): # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass xbmc.output("ficherocookies %s" % ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlpagina #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer': 'http://www.movshare.net/' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data patronvideos = '<embed type="video/divx" src="([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) == 0: patronvideos = '"file","([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) return matches[0]
def downloadpage(url,post=None,headers=[['User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; es-ES; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14']]): logger.info("[scrapertools.py] downloadpage") logger.info("[scrapertools.py] url="+url) if post is not None: logger.info("[scrapertools.py] post="+post) else: logger.info("[scrapertools.py] post=None") # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join( config.get_setting("cookies.dir"), 'cookies.lwp' ) logger.info("[scrapertools.py] Cookiefile="+ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: logger.info("[scrapertools.py] Importando cookielib") import cookielib except ImportError: logger.info("[scrapertools.py] cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: logger.info("[scrapertools.py] Importando ClientCookie") import ClientCookie except ImportError: logger.info("[scrapertools.py] ClientCookie no disponible") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: logger.info("[scrapertools.py] ClientCookie disponible") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: logger.info("[scrapertools.py] cookielib disponible") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules logger.info("[scrapertools.py] Hay cookies") if os.path.isfile(ficherocookies): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: logger.info("[scrapertools.py] opener usando urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: logger.info("[scrapertools.py] opener usando ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Añade las cabeceras for header in headers: logger.info("[scrapertools.py] header="+header[0]+": "+header[1]) txheaders[header[0]]=header[1] # Construye el request if post is None: logger.info("[scrapertools.py] petición GET") else: logger.info("[scrapertools.py] petición POST") req = Request(url, post, txheaders) handle = urlopen(req) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra data=handle.read() handle.close() ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin = time.clock() logger.info("[scrapertools.py] Descargado en %d segundos " % (fin-inicio+1)) return data
for header in request.header_items(): self.httpout.write("%s: %s\n" % header[:]) self.httpout.write('\n') return request def http_response(self, request, response): if __debug__: code, msg, hdrs = response.code, response.msg, response.info() self.httpout.write("HTTP/1.x %s %s\n" % (code, msg)) self.httpout.write(str(hdrs)) return response https_request = http_request https_response = http_response # Example cjar = ClientCookie.LWPCookieJar() opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cjar), ClientCookie.HTTPRefererProcessor(), HTTPMyDebugProcessor(), ) ClientCookie.install_opener(opener) response = ClientCookie.urlopen("http://www.google.com") #...
def geturl(urlvideo): # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass #xbmc.output("ficherocookies %s", ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://video.yahoo.com/watch/%s" % urlvideo #url = "http://new.music.yahoo.com/videos/" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer': 'http://video.yahoo.com/', 'X-Forwarded-For': '12.13.14.15' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data ''' # Extract video height and width mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', data) if mobj is None: logger.info('ERROR: unable to extract video height') return "" yv_video_height = mobj.group(1) mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', data) if mobj is None: logger.info('ERROR: unable to extract video width') return "" yv_video_width = mobj.group(1) ''' # Retrieve video playlist to extract media URL # I'm not completely sure what all these options are, but we # seem to need most of them, otherwise the server sends a 401. yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded url = ( 'http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + urlvideo + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=720' + '&vidW=1280' + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797' ) #http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=v205690975&tech=flash&mode=playlist&lg=xRen3QvzZ_5wj1x8BbzEcR&bitrate=700&vidH=324&vidW=576&swf=as3&rd=video.yahoo.com-offsite&tk=null&adsupported=v1,v2,&eventid=1301797 #url = 'http://video.music.yahoo.com/up/music_e/process/getPlaylistFOP.php?node_id='+ urlvideo + '&tech=flash&bitrate=20000&mode=&vidH=720&vidW=1280' req = Request(url, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data2 = handle.read() handle.close() print data2 # Extract media URL from playlist XML mobj = re.search( r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', data2) if mobj is not None: video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8') video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url) print video_url return video_url else: logger.info('ERROR: Unable to extract media URL http') mobj = re.search(r'<STREAM (APP="[^>]+)>', data2) if mobj is None: logger.info('ERROR: Unable to extract media URL rtmp') return "" #video_url = mobj.group(1).replace("&","&") video_url = urllib.unquote(mobj.group(1).decode('utf-8')) video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url) ''' <STREAM APP="rtmp://s1sflod020.bcst.cdn.s1s.yimg.com/StreamCache" FULLPATH="/s1snfs06r01/001/__S__/lauvpf/76414327.flv?StreamID=76414327&xdata=Njc3Mzc4MzA2NGNiNzI5MW-205754530-0&pl_auth=2598a5574b592b7c6ab262e4775b3930&ht=180&b=eca0lm561k1gn4cb7291a&s=396502118&br=700&q=ahfG2he5gqV40Laz.RUcnB&rd=video.yahoo.com-offsite&so=%2FMUSIC" CLIPID="v205690975" TYPE="STREAMING" AD="NO" APPNAME="ContentMgmt" URLPREFIX="rtmp://" SERVER="s1sflod020.bcst.cdn.s1s.yimg.com" BITRATE="7000" PORT="" PATH="/s1snfs06r01/001/__S__/lauvpf/76414327.flv" QUERYSTRING="StreamID=76414327&xdata=Njc3Mzc4MzA2NGNiNzI5MW-205754530-0&pl_auth=2598a5574b592b7c6ab262e4775b3930&ht=180&b=eca0lm561k1gn4cb7291a&s=396502118&br=700&q=ahfG2he5gqV40Laz.RUcnB&rd=video.yahoo.com-offsite&so=%2FMUSIC" URL="" TITLE="-" AUTHOR="-" COPYRIGHT="(c) Yahoo! Inc. 2006" STARTTIME="" ENDTIME=""/> ''' swfUrl = 'http://d.yimg.com/ht/yep/vyc_player.swf' try: App = re.compile(r'APP="([^"]+)"').findall(video_url)[0] Fullpath = re.compile(r'FULLPATH="([^"]+)"').findall(video_url)[0] Appname = re.compile(r'APPNAME="([^"]+)"').findall(video_url)[0] #Server = re.compile(r'SERVER="([^"]+)"').findall(video_url)[0] Path = re.compile(r'PORT="" PATH="([^"]+)"').findall( video_url)[0].replace(".flv", "") #Querystring = re.compile(r'QUERYSTRING="([^"]+)"').findall(video_url)[0] playpath = Fullpath App = App.replace("/StreamCache", ":1935/StreamCache/") video_url = "%s%s%s playpath=%s swfurl=%s swfvfy=true" % ( App, Appname, playpath, Path, swfUrl) except: logger.info('ERROR: re.compile failed') video_url = "" print video_url.encode("utf-8") return video_url
def downloadpage(url,post=None,headers=[['User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12']],follow_redirects=True, timeout=socket.getdefaulttimeout()): logger.info("[scrapertools.py] downloadpage") logger.info("[scrapertools.py] url="+url) if post is not None: logger.info("[scrapertools.py] post="+post) else: logger.info("[scrapertools.py] post=None") # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join( config.get_setting("cookies.dir"), 'cookies.lwp' ) logger.info("[scrapertools.py] ficherocookies="+ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: logger.info("[scrapertools.py] Importando cookielib") import cookielib except ImportError: logger.info("[scrapertools.py] cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: logger.info("[scrapertools.py] Importando ClientCookie") import ClientCookie except ImportError: logger.info("[scrapertools.py] ClientCookie no disponible") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: logger.info("[scrapertools.py] ClientCookie disponible") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: logger.info("[scrapertools.py] cookielib disponible") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules logger.info("[scrapertools.py] Hay cookies") if os.path.isfile(ficherocookies): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: logger.info("[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra") os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: logger.info("[scrapertools.py] opener usando urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler()) else: opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: logger.info("[scrapertools.py] opener usando ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Construye el request if post is None: logger.info("[scrapertools.py] petición GET") else: logger.info("[scrapertools.py] petición POST") # Añade las cabeceras logger.info("[scrapertools.py] ---------------------------") for header in headers: logger.info("[scrapertools.py] header %s=%s" % (str(header[0]),str(header[1])) ) txheaders[header[0]]=header[1] logger.info("[scrapertools.py] ---------------------------") req = Request(url, post, txheaders) if timeout is None: handle=urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: deftimeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(timeout) handle=urlopen(req) except: import sys for line in sys.exc_info(): logger.error( "%s" % line ) socket.setdefaulttimeout(deftimeout) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra data=handle.read() info = handle.info() logger.info("[scrapertools.py] Respuesta") logger.info("[scrapertools.py] ---------------------------") for header in info: logger.info("[scrapertools.py] "+header+"="+info[header]) handle.close() logger.info("[scrapertools.py] ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin = time.clock() logger.info("[scrapertools.py] Descargado en %d segundos " % (fin-inicio+1)) return data
def cachePagePostCookies(url, data): xbmc.output("[scrapertools.py] cachePagePostCookies - " + url) xbmc.output("[scrapertools.py] cachePagePostCookies - data=" + data) inicio = time.clock() # Inicializa la librería de las cookies ficherocookies = os.path.join(os.getcwd(), 'cookies.lwp') xbmc.output("[scrapertools.py] cachePagePostCookies - Cookiefile=" + ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, data, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() fin = time.clock() xbmc.output("[scrapertools.py] Descargado en %d segundos " % (fin - inicio + 1)) return data
try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(COOKIEFILE): # if we have a cookie file already saved
def getAuthentication(self, user, password): COOKIEFILE = 'cookies.lwp' # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(COOKIEFILE): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(COOKIEFILE) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) values = {'campo_login': user, 'campo_password': password} #We set the user and the pass theurl = 'http://www.basketpc.com/index.php?mod=autentificacion' # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = urllib.urlencode(values) # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64)'} # fake a user agent, some websites (like google) don't like automated exploration try: req = Request(theurl, txdata, txheaders) # create a request object handle = urlopen(req) # and open it to return a handle on the url except IOError, e: print 'We failed to open "%s".' % theurl if hasattr(e, 'code'): print 'We failed with error code - %s.' % e.code elif hasattr(e, 'reason'): print "The error object has the following 'reason' attribute :" print e.reason print "This usually means the server doesn't exist,'," print "is down, or we don't have an internet connection." sys.exit()