Python URLOpen.open示例，core.url_open.URLOpen.open Python示例

示例#1

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		#Remove the filename from the url
		tmp = url.split("/file/")[1].split("/")[0]
		url = "%s/file/%s" % (BASE_URL,tmp)

		link = None
		retry = 3
		try:
			opener = URLOpen()
			for line in opener.open(url):
				if 'check:' in line:
					check = line.split("check:'")[1].replace("'","").strip()
				elif "Recaptcha.create" in line:
					tmp = line.split('("')[1].split('"')[0]
					recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp 
					if not wait_func():
						return
					c = Recaptcha(BASE_URL, recaptcha_link)
					while not link and retry:
						challenge, response = c.solve_captcha()
						if response:
							if not wait_func():
								return

							#Filefactory perfoms as check on its server by doing an
							#Ajax request sending the following data
							form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined"),("check", check)])
							url = "%s/file/checkCaptcha.php" % BASE_URL

							#Getting the result back, status:{"ok"|"fail"}
							for line in opener.open(url, form):
								if 'status:"ok"' in line:
									tmp = line.split('path:"')[1].strip('"')
									tmp_link = "%s%s" %(BASE_URL,tmp)
									for line in opener.open(tmp_link):
										if '<span class="countdown">' in line:
											#Try to get WAIT from the page
											try:
												tmp = line.split('"countdown">')[1].split("</span")[0]
												tmp = int(tmp)
											except ValueError:
												pass
											else:
												if tmp > 0:
													WAIT = tmp
										if "Download with FileFactory Basic" in line:
											link = line.split('<a href="')[1].split('"')[0]
											break
						retry -= 1
					break
			if link:
				if not wait_func(WAIT):
					return
				return opener.open(link, None, content_range, True)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#2

0

显示文件

文件： premium_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		auth_string = self.get_cookie()
		if not wait_func():
			return

		encoded_link = 'http://api.hotfile.com/?action=getdirectdownloadlink&link=' + url + auth_string
		logger.info("Encoded link %s" % (encoded_link))
		opener = URLOpen()
		handler = opener.open(encoded_link)
		actual_link = handler.readline()
		return opener.open(actual_link)

示例#3

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def parse_wait(self, url):
		""""""
		link = None
		form = None
		wait = 0
		found = False
		try:
			tmp_form = []
			opener = URLOpen()
			for line in opener.open(url):
				if "download_file" in line:
					found = True
				elif found:
					if "method=post " in line:
						link = "%s%s" % (BASE_URL, line.split('action="')[1].split('" ')[0])
					elif "name=action " in line:
						tmp_form.append(("action", line.split("value=")[1].split(">")[0]))
					elif "name=tm " in line:
						tmp_form.append(("tm", line.split("value=")[1].split(">")[0]))
					elif "name=tmhash " in line:
						tmp_form.append(("tmhash", line.split("value=")[1].split(">")[0]))
					elif "name=wait " in line:
						wait = int(line.split("value=")[1].split(">")[0])
						tmp_form.append(("wait", wait))
					elif "name=waithash " in line:
						tmp_form.append(("waithash", line.split("value=")[1].split(">")[0]))
					elif "name=upidhash " in line:
						tmp_form.append(("upidhash", line.split("value=")[1].split(">")[0]))
						found = False
			form = urllib.urlencode(tmp_form)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#4

0

显示文件

文件： premium_cookie.py 项目： GatoLoko/tucan

	def get_cookie(self, user, password, url=None):
		""""""
		if user == None or password == None:
			return None

		cookie = cookielib.CookieJar()
		opener = URLOpen(cookie)
		encoded_str = urllib.urlencode({
				"password": password,
				"login"   : user,
				"rand"    : "", 
				"redirect": "",
				"op"      : "login"
				})

		opener.open("http://www.oron.com/login", encoded_str)
		if len(cookie) > 0:
			return cookie

示例#5

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		link = None
		retry = 3
		try:
			if "?" in url:
				url = url.split("?")[0]
			tmp_link, tmp_form, wait = self.parse_wait(url)
			if not tmp_link or not tmp_form:
				return self.set_limit_exceeded()
			elif not wait_func(wait):
				return
			else:
				opener = URLOpen(cookielib.CookieJar())
				it = opener.open(tmp_link, tmp_form)
				for line in it:
					if "function starthtimer(){" in line:
						it.next()
						try:
							tmp = int(it.next().split("+")[1].split(";")[0])
							return self.set_limit_exceeded(int(tmp/1000))
						except Exception, e:
							logger.exception("%s: %s" % (url, e))
							return
					elif "click_download" in line:
						link = line.split('href="')[1].split('"')[0]
						break
					elif "http://api.recaptcha.net/challenge" in line:
						recaptcha_link = line.split('src="')[1].split('"')[0]
						if not wait_func():
							return
						c = Recaptcha(BASE_URL, recaptcha_link)
						while not link and retry:
							challenge, response = c.solve_captcha()
							if response:
								if not wait_func():
									return
								form = urllib.urlencode([("action", "checkcaptcha"), ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response)])
								for line in opener.open(tmp_link, form):
									if "click_download" in line:
										link = line.split('href="')[1].split('"')[0]
										break
							retry -= 1
						break

示例#6

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			wait = WAIT
			link = None
			opener = URLOpen()
			#Transform the url into an english one
			url = "%s%s" % (BASE_URL, url.split("/files/")[1].split("/")[0])
			form =  urllib.urlencode([('gateway_result','1')])
			for line in opener.open(url,form):
				#Try to get WAIT from the page
				if 'download_waiter_remain' in line:
					try:
						tmp = line.split(">")[2].split("<")[0]
						tmp = int(tmp)
					except Exception, e:
						pass
					else:
						if tmp > 0:
							wait = tmp
				elif "$('#download_container').load('" in line:
					try:
						tmp = line.split("load('")[1].split("'")[0]
						url = "%s%s" % ("http://depositfiles.com", tmp)
					except Exception, e:
						pass
					if not wait_func(wait + 1):
						return
					#Due to a bug in DepositFiles, sometimes it returns "Invalid params"
					#If it's the case, retry, 10 times and set limit exceeded
					for attempt in range(10):
						for line in opener.open(url):
							if "Invalid" in line:
								if not wait_func():
									return
								break
							elif "action" in line:
								link = line.split('"')[1].split('"')[0]
								break
						if link:
							break

示例#7

0

显示文件

文件： premium_cookie.py 项目： GatoLoko/tucan

	def get_cookie(self, user, password, url=None):
		""""""
		if user == None or password == None:
			return None

		cookie = cookielib.CookieJar()
		opener = URLOpen(cookie)
		encoded_str = urllib.urlencode({
				"loginUserName":user, 
				"loginUserPassword":password,
				"autoLogin":"******",
				"recaptcha_response_field":"",
				"recaptcha_challenge_field":"",
				"recaptcha_shortencode_field":"",
				"loginFormSubmit":"Login"})

		#logger.warning("Submitting this post: %s" % encoded_str)

		opener.open("http://www.fileserve.com/login.php", encoded_str)
		if len(cookie) > 0:
			return cookie

示例#8

0

显示文件

文件： premium_cookie.py 项目： GatoLoko/tucan

	def get_cookie(self, user, password, url=None):
		""""""
		opener = URLOpen()
		data = urllib.urlencode([("sub", "getaccountdetails_v1"), ("type", "prem"), ("login", user), ("password", password), ("withcookie", 1)])
		for line in opener.open(API_URL, data).readlines():
			if "ERROR" in line:
				return
			elif "cookie" in line:
				tmp_cookie = cookielib.Cookie(version=0, name='enc', value=line.split("=")[1].strip(), port=None, port_specified=False, domain='.rapidshare.com', domain_specified=False, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
				cookie = cookielib.CookieJar()
				cookie.set_cookie(tmp_cookie)
				return cookie

示例#9

0

显示文件

    def link_parser(self, url, wait_func, content_range=None):
        """"""
        try:
            #Remove the filename from the url
            tmp = url.split("/file/")[1].split("/")[0]
            url = "%s/file/%s" % (BASE_URL, tmp)

            file_id = url.split("/")[-1].strip("/")
            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)

            form = urllib.urlencode([("checkDownload", "check")])
            #If the limit is exceeded
            if '"fail":"timeLimit"' in opener.open(url, form).read():
                return self.set_limit_exceeded()

            it = opener.open(url)
            for line in it:
                if 'reCAPTCHA_publickey=' in line:
                    tmp = line.split("'")[1].split("'")[0]
                    recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
                    if not wait_func():
                        return
                    c = Recaptcha(BASE_URL, recaptcha_link)
                    for retry in range(3):
                        challenge, response = c.solve_captcha()
                        if response:
                            if not wait_func():
                                return

                            #Submit the input to the recaptcha system
                            form = urllib.urlencode([
                                ("recaptcha_challenge_field", challenge),
                                ("recaptcha_response_field", response),
                                ("recaptcha_shortencode_field", file_id)
                            ])
                            recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL

                            #Captcha is good
                            if "success" in opener.open(recaptcha_url,
                                                        form).read():
                                form = urllib.urlencode([("downloadLink",
                                                          "wait")])
                                wait = int(opener.open(url, form).read()[-2:])
                                if not wait_func(wait):
                                    return
                                form = urllib.urlencode([("downloadLink",
                                                          "show")])
                                opener.open(url, form).read()
                                form = urllib.urlencode([("download", "normal")
                                                         ])
                                return opener.open(url, form)  #,content_range)
        except Exception, e:
            logger.exception("%s: %s" % (url, e))

示例#10

0

显示文件

文件： premium_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			cookie = self.get_cookie()
			if not wait_func():
				return
			opener = URLOpen(cookie)
			handler = opener.open(url, None, content_range)
			if not wait_func():
				return
			if "text/html" in handler.info()["Content-Type"]:
				cookie_value = cookie._cookies[".rapidshare.com"]["/"]["enc"].value
				tmp = url.split("/")
				form =  urllib.urlencode([("sub", "download_v1"), ("cookie", cookie_value), ("fileid", tmp[4]), ("filename", tmp[5])])
				for line in opener.open("http://api.rapidshare.com%s" % API_URL, form, content_range):
					if "DL:" in line:
						tmp_url = "http://%s%s" % (line.split("DL:")[1].split(",")[0], API_URL)
						return opener.open(tmp_url, form, content_range)
			else:
				return handler
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#11

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			tmp_link = None
			link = None
			wait = WAIT
			opener = URLOpen(cookielib.CookieJar())
			it = opener.open(url)
			for line in it:
				if "dbtn" in line:
					tmp_link = line.split('href="')[1].split('"')[0]
			if tmp_link:
				it = opener.open(tmp_link)
				for line in it:
					if "id='divDLStart'" in line:
						link = it.next().split("<a href='")[1].split("'")[0]
					elif '<div class="sec">' in line:
						wait = int(line.split(">")[1].split("<")[0])
			if not link:
				return
			elif not wait_func(wait):
				return
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#12

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			link = None
			opener = URLOpen()
			form =  urllib.urlencode([('download','&nbsp;REGULAR DOWNLOAD&nbsp;')])
			for line in opener.open(url,form):
				if '<span id="spn_download_link">' in line:
					link = line.split('href="')[1].split('"')[0]
			if not link:
				return
			if not wait_func():
				return
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#13

0

显示文件

 def link_parser(self, url, wait_func, content_range=None):
     """"""
     found = False
     try:
         cookie = self.get_cookie()
         if not wait_func():
             return
         opener = URLOpen(cookie)
         handler = opener.open(url, None, content_range)
         if not wait_func():
             return
         else:
             return handler
     except Exception, e:
         logger.exception("%s: %s" % (url, e))

示例#14

0

显示文件

文件： premium_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		found = False
		try:
			cookie = self.get_cookie()
			if not wait_func():
				return
			opener = URLOpen(cookie)
			handler = opener.open(url, None, content_range)
			if not wait_func():
				return
			else:
				return handler
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#15

0

显示文件

文件： premium_cookie.py 项目： GatoLoko/tucan

class PremiumCookie:
	""""""
	def __init__(self):
		""""""
		self.digestURL = URLOpen()

	def get_cookie(self, user, password, url=None):
		""""""
		if user == None or password == None:
			return None

		DigestURLHandler = self.digestURL.open('http://api.hotfile.com/?action=getdigest')
		
		# retrieve MD5 digest
		md5Digest = DigestURLHandler.readline()
		md5pw = hashlib.md5(password).hexdigest()
		md5pw = hashlib.md5(md5pw+md5Digest).hexdigest()
		return '&username='******'&passwordmd5dig='+md5pw+'&digest='+md5Digest

示例#16

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			#Remove the filename from the url
			tmp = url.split("/file/")[1].split("/")[0]
			url = "%s/file/%s" % (BASE_URL,tmp)
			
			file_id = url.split("/")[-1].strip("/")
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			
			form = urllib.urlencode([("checkDownload", "check")])
			#If the limit is exceeded
			if '"fail":"timeLimit"' in opener.open(url,form).read():
				return self.set_limit_exceeded()
				
			it = opener.open(url)
			for line in it:
				if 'reCAPTCHA_publickey=' in line:
					tmp = line.split("'")[1].split("'")[0]
					recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
					if not wait_func():
						return
					c = Recaptcha(BASE_URL, recaptcha_link)
					for retry in range(3):
						challenge, response = c.solve_captcha()
						if response:
							if not wait_func():
								return
							
							#Submit the input to the recaptcha system
							form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field",file_id)])
							recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL
							
							#Captcha is good
							if "success" in opener.open(recaptcha_url,form).read():
								form = urllib.urlencode([("downloadLink", "wait")])
								wait = int(opener.open(url,form).read()[-2:])
								if not wait_func(wait):
									return
								form = urllib.urlencode([("downloadLink", "show")])
								opener.open(url,form).read()
								form = urllib.urlencode([("download", "normal")])
								return opener.open(url,form)#,content_range)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#17

0

显示文件

class PremiumCookie:
    """"""
    def __init__(self):
        """"""
        self.digestURL = URLOpen()

    def get_cookie(self, user, password, url=None):
        """"""
        if user == None or password == None:
            return None

        DigestURLHandler = self.digestURL.open(
            'http://api.hotfile.com/?action=getdigest')

        # retrieve MD5 digest
        md5Digest = DigestURLHandler.readline()
        md5pw = hashlib.md5(password).hexdigest()
        md5pw = hashlib.md5(md5pw + md5Digest).hexdigest()
        return '&username='******'&passwordmd5dig=' + md5pw + '&digest=' + md5Digest

示例#18

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		link = None
		wait = 0
		try:
			tmp = url.split("/")
			opener = URLOpen()
			url = "%s&fileid=%s" % (API_URL,tmp[4])
			url = "%s&filename=%s" % (url,tmp[5])
			for line in opener.open("http://%s%s" % ("api.rapidshare.com",url)):
				print line
				if "DL:" in line:
					tmp = line.split("DL:")[1].split(",")
					link = "http://%s%s&dlauth=%s" % (tmp[0],url,tmp[1])
					wait = int(tmp[2])
					print link
			if not wait_func(wait):
				return
			if link:
				return URLOpen().open(link, content_range)
			else:
				return self.set_limit_exceeded()
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#19

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			link = None
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			if "/video/" in url:
				url = url.replace("/video/", "/download/")
			elif "/audio/" in url:
				url = url.replace("/audio/", "/download/")
			elif "/image/" in url:
				url = url.replace("/image/", "/download/")
			try:
				form = urllib.urlencode([("download", 1)])
				for line in opener.open(url,form):
					if 'link_enc=new Array' in line:
						tmp = line.strip().split("var link_enc=new Array(")[1].split(");")[0]
						link = tmp.replace("','","").replace("'","")
					#Try to get WAIT from the page
					if 'document|important' in line:
						try:
							tmp = line.split("here|")[1].split("|class")[0]
							tmp = int(tmp)
						except ValueError:
							pass
						else:
							if tmp > 0:
								WAIT = tmp
						break
			except Exception, e:
				logger.exception("%s :%s" % (url, e))
				
			if not link:
				return
			if not wait_func(WAIT):
				return

示例#20

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			link = [] #One link at the end is in two parts
			captcha_url = None
			wait = WAIT
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			if not wait_func():
				return
			#Get the captcha url
			data = urllib.urlencode([("rs", "refreshImage"), ("rst", ""), ("rsrnd", int(time.time()))])
			tmp = opener.open(url, data).read().split("+:var res = '")[1].split("'; res;")[0].replace('\\"', '"')
			form_action = tmp.split('action="')[1].split('"')[0]
			cap_id = tmp.split('name=cap_id value=')[1].split('>')[0]
			cap_secret = tmp.split('name=cap_secret value=')[1].split('>')[0]
			captcha_url = "%s%s" % (BASE_URL, tmp.split('img src="')[1].split('"')[0])

			if captcha_url:
				solved = False
				cont = 0
				while (not solved) and cont < 4:
					tes = Tesseract(opener.open(captcha_url).read(), self.filter_image)
					captcha = tes.get_captcha()
					#Crack trick to optimize the OCR
					if len(captcha) == 4 and captcha.isalnum():
					
						if not captcha.isalpha():
							for i, j in [("0", "O"),("1", "I"),("2", "Z"),("3", "B"),("4", "A"),("5", "S"),("6", "G"),("7", "T"),("8", "B"),("9", "B")]:
								captcha = captcha.replace(i,j)
								
					captcha = captcha.upper()
					#Captcha : 4 letters
					if len(captcha) == 4 and captcha.isalpha():
						if not wait_func():
							return
						logger.info("Captcha: %s" % captcha)
						
						data = urllib.urlencode([("user_code", captcha), ("cap_id",cap_id), ("cap_secret",cap_secret)])
						
						it = opener.open(form_action, data)
						z = None
						h = None
						for line in it:
							if "'z':'I!" in line:
								z = line.split("'z':'")[1].split("'")[0]
								h = line.split("'h':'")[1].split("'")[0]
							elif 'window.location.href = dlUrl' in line:
								it.next()
								link.append(it.next().split('"')[1].split('"')[0])
								solved = True #If there is this line, the captcha is good
								break

						cont += 1
						
						#If the captcha is good
						if solved and z and h:
							logger.info("Good captcha")
							if not wait_func():
								return
							data = urllib.urlencode([("id",form_action.split("/")[-1]), ("type","file"), ("ext",""),("f","download:init"),("z","zvar"),("h","hvar")])
							data = data.replace("zvar",z).replace("hvar",h)
							#The referer needs to be specify
							res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action)
							t = None
							wait = None
							z = None
							h = None
							for line in res:
								if "'z'" in line:
									z = line.split("'z': '")[1].split("'")[0]
								elif "'h'" in line:
									h = line.split("'h': '")[1].split("'")[0]
								elif "'t'" in line:
									t = line.split("'t': '")[1].split("'")[0]
								elif "check_n" in line:
									wait = int(line.split('[\'check_n\'] = "')[1].split('"')[0])

							if not wait:
								wait = WAIT
						
							if not wait_func(wait):
								return
							
							data = urllib.urlencode([("id",form_action.split("/")[-1]), ("type","file"), ("ext",""),("f","download:check"),("z","zvar"),("h","hvar"),("t",t)])
							data = data.replace("zvar",z).replace("hvar",h)
						
							res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action)
						
							t = None
							z = None
							h = None
							#Sometimes it sends another check_n
							while True:
								if not wait_func():
									return
								res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action)
								wait = None
								for line in res:
									if "check_n" in line:
										wait = int(line.split("=")[1].split(";")[0])
										break
									elif "'z'" in line:
										z = line.split("'z': '")[1].split("'")[0]
									elif "'h'" in line:
										h = line.split("'h': '")[1].split("'")[0]
									elif "'t'" in line:
										t = line.split("'t': '")[1].split("'")[0]
								if not wait:
									break
								else:
									if not wait_func(wait):
										return
										
							if not wait_func():
								return
							
							data = urllib.urlencode([("rs","getFileLink"),("rst",""),("rsrnd",int(time.time())),("rsargs[]","0"),("rsargs[]","yellow"),("rsargs[]","zvar"),("rsargs[]","hvar"),("rsargs[]",t),("rsargs[]","file"),("rsargs[]",form_action.split("/")[-1]),("rsargs[]","")])
							data = data.replace("zvar",z).replace("hvar",h)
							
							#This cookie needs to be added manually
							gflcur = cookielib.Cookie(version=0, name='_gflCur', value='0', port=None, port_specified=False, domain='www.badongo.com', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
							cookie.set_cookie(gflcur)
						
							res = opener.open(form_action, data,None,True,form_action).readlines()
							tmp = res[0].split('onclick')[2].split('(')[1].split("')")[0].replace('\\','').strip("'")
							link.append(tmp)
							
							if not wait_func():
								return
								
							url = "%s%s?zenc=" %(link[1],link[0])
							res = opener.open(url, data,None,True,form_action)
						
							for line in res:
								if "window.location.href = '" in line:
									final_url = line.split("window.location.href = '")[1].split("'")[0]
									break
							return opener.open("%s%s" % (BASE_URL,final_url), data,content_range,True,url)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#21

0

显示文件

    def link_parser(self, url, wait_func, content_range=None):
        """"""
        try:
            link = []  #One link at the end is in two parts
            captcha_url = None
            wait = WAIT
            cookie = cookielib.CookieJar()
            opener = URLOpen(cookie)
            if not wait_func():
                return
            #Get the captcha url
            data = urllib.urlencode([("rs", "refreshImage"), ("rst", ""),
                                     ("rsrnd", int(time.time()))])
            tmp = opener.open(url, data).read().split(
                "+:var res = '")[1].split("'; res;")[0].replace('\\"', '"')
            form_action = tmp.split('action="')[1].split('"')[0]
            cap_id = tmp.split('name=cap_id value=')[1].split('>')[0]
            cap_secret = tmp.split('name=cap_secret value=')[1].split('>')[0]
            captcha_url = "%s%s" % (BASE_URL,
                                    tmp.split('img src="')[1].split('"')[0])

            if captcha_url:
                solved = False
                cont = 0
                while (not solved) and cont < 4:
                    tes = Tesseract(
                        opener.open(captcha_url).read(), self.filter_image)
                    captcha = tes.get_captcha()
                    #Crack trick to optimize the OCR
                    if len(captcha) == 4 and captcha.isalnum():

                        if not captcha.isalpha():
                            for i, j in [("0", "O"), ("1", "I"), ("2", "Z"),
                                         ("3", "B"), ("4", "A"), ("5", "S"),
                                         ("6", "G"), ("7", "T"), ("8", "B"),
                                         ("9", "B")]:
                                captcha = captcha.replace(i, j)

                    captcha = captcha.upper()
                    #Captcha : 4 letters
                    if len(captcha) == 4 and captcha.isalpha():
                        if not wait_func():
                            return
                        logger.info("Captcha: %s" % captcha)

                        data = urllib.urlencode([("user_code", captcha),
                                                 ("cap_id", cap_id),
                                                 ("cap_secret", cap_secret)])

                        it = opener.open(form_action, data)
                        z = None
                        h = None
                        for line in it:
                            if "'z':'I!" in line:
                                z = line.split("'z':'")[1].split("'")[0]
                                h = line.split("'h':'")[1].split("'")[0]
                            elif 'window.location.href = dlUrl' in line:
                                it.next()
                                link.append(
                                    it.next().split('"')[1].split('"')[0])
                                solved = True  #If there is this line, the captcha is good
                                break

                        cont += 1

                        #If the captcha is good
                        if solved and z and h:
                            logger.info("Good captcha")
                            if not wait_func():
                                return
                            data = urllib.urlencode([
                                ("id", form_action.split("/")[-1]),
                                ("type", "file"), ("ext", ""),
                                ("f", "download:init"), ("z", "zvar"),
                                ("h", "hvar")
                            ])
                            data = data.replace("zvar", z).replace("hvar", h)
                            #The referer needs to be specify
                            res = opener.open("%s%s" % (BASE_URL, JS_URL),
                                              data, None, True, form_action)
                            t = None
                            wait = None
                            z = None
                            h = None
                            for line in res:
                                if "'z'" in line:
                                    z = line.split("'z': '")[1].split("'")[0]
                                elif "'h'" in line:
                                    h = line.split("'h': '")[1].split("'")[0]
                                elif "'t'" in line:
                                    t = line.split("'t': '")[1].split("'")[0]
                                elif "check_n" in line:
                                    wait = int(
                                        line.split('[\'check_n\'] = "')
                                        [1].split('"')[0])

                            if not wait:
                                wait = WAIT

                            if not wait_func(wait):
                                return

                            data = urllib.urlencode([
                                ("id", form_action.split("/")[-1]),
                                ("type", "file"), ("ext", ""),
                                ("f", "download:check"), ("z", "zvar"),
                                ("h", "hvar"), ("t", t)
                            ])
                            data = data.replace("zvar", z).replace("hvar", h)

                            res = opener.open("%s%s" % (BASE_URL, JS_URL),
                                              data, None, True, form_action)

                            t = None
                            z = None
                            h = None
                            #Sometimes it sends another check_n
                            while True:
                                if not wait_func():
                                    return
                                res = opener.open("%s%s" % (BASE_URL, JS_URL),
                                                  data, None, True,
                                                  form_action)
                                wait = None
                                for line in res:
                                    if "check_n" in line:
                                        wait = int(
                                            line.split("=")[1].split(";")[0])
                                        break
                                    elif "'z'" in line:
                                        z = line.split("'z': '")[1].split(
                                            "'")[0]
                                    elif "'h'" in line:
                                        h = line.split("'h': '")[1].split(
                                            "'")[0]
                                    elif "'t'" in line:
                                        t = line.split("'t': '")[1].split(
                                            "'")[0]
                                if not wait:
                                    break
                                else:
                                    if not wait_func(wait):
                                        return

                            if not wait_func():
                                return

                            data = urllib.urlencode([
                                ("rs", "getFileLink"), ("rst", ""),
                                ("rsrnd", int(time.time())), ("rsargs[]", "0"),
                                ("rsargs[]", "yellow"), ("rsargs[]", "zvar"),
                                ("rsargs[]", "hvar"), ("rsargs[]", t),
                                ("rsargs[]", "file"),
                                ("rsargs[]", form_action.split("/")[-1]),
                                ("rsargs[]", "")
                            ])
                            data = data.replace("zvar", z).replace("hvar", h)

                            #This cookie needs to be added manually
                            gflcur = cookielib.Cookie(version=0,
                                                      name='_gflCur',
                                                      value='0',
                                                      port=None,
                                                      port_specified=False,
                                                      domain='www.badongo.com',
                                                      domain_specified=False,
                                                      domain_initial_dot=False,
                                                      path='/',
                                                      path_specified=True,
                                                      secure=False,
                                                      expires=None,
                                                      discard=True,
                                                      comment=None,
                                                      comment_url=None,
                                                      rest={'HttpOnly': None},
                                                      rfc2109=False)
                            cookie.set_cookie(gflcur)

                            res = opener.open(form_action, data, None, True,
                                              form_action).readlines()
                            tmp = res[0].split('onclick')[2].split(
                                '(')[1].split("')")[0].replace('\\',
                                                               '').strip("'")
                            link.append(tmp)

                            if not wait_func():
                                return

                            url = "%s%s?zenc=" % (link[1], link[0])
                            res = opener.open(url, data, None, True,
                                              form_action)

                            for line in res:
                                if "window.location.href = '" in line:
                                    final_url = line.split(
                                        "window.location.href = '")[1].split(
                                            "'")[0]
                                    break
                            return opener.open("%s%s" % (BASE_URL, final_url),
                                               data, content_range, True, url)
        except Exception, e:
            logger.exception("%s: %s" % (url, e))

示例#22

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		"""
		Oron links usually look like this:
			http://www.oron.com/file_id/file_name.foo.html
		However, by testing it seems that the server pulls the file name out by
		using the file_id, which is some sort of hash. 
		So the same file can aswell be accessed by: 
			http://www.oron.com/file_id/file_name.foo.html.html
		and 
			http://www.oron.com/file_id/file_name.foo.html.html(.html)*
		So we use check_links to get the file name form the HTML page, its 
		slower, but more accurate as we cannot rely on the url passed here
		"""
		file_id   = url.split("/")[3]
		file_name = self.check_links(url)[0]
		encoded_str = urllib.urlencode({
			"op"          : "download1",
			"usr_login"   : "",
			"id"          : file_id,
			"name"        : file_name,
			"referer"     : "",
			"method_free" : "+Regular+Download+"})
		opener = URLOpen()

		"""
		The url we are currently trying to open is the origin (referring) URL 
		preceding the post
		"""
		web_page = opener.open(url, encoded_str, False, url)


		for retry in range(3):
			if not wait_func():
				return

			for line in web_page:
				if '<input type="hidden" name="rand" value="' in line:
					rand_value = line.split('value="')[1].split('"')[0]
					break

			if not rand_value:
				logger.warning("Oron Plugin: No random value in download page- template changed?");
				return self.set_limit_exceeded(wait)

			for line in  web_page:
				if '<span id="countdown">' in line:
					wait_length  = line.split('<span id="countdown">')[1].split('<')[0]
					if not wait_func(int(wait_length)):
						return

				"""
				Check for longer limits
				"""
				if '<p class="err"' in line:
					parse_line = line.split('>')[1].split('<')[0]
					seconds = 0 
					minutes = 0
					hours   = 0
					prev_word = ''

					for word in parse_line.split(' '):

						if  word == 'hour,' or word == 'hours,':
							hours = int(prev_word)
                             
						elif  word == 'minute,' or word == 'minutes,':
							minutes = int(prev_word)

						elif  word == 'second' or word == 'seconds':
							seconds = int(prev_word)
							break
						else:
							prev_word = word

					seconds = seconds + (minutes * 60) + (hours * 3600)
					return self.set_limit_exceeded(seconds)
				
				if  'http://api.recaptcha.net/challenge?' in line:
					recaptcha_link = line.split('src="')[1].split('"')[0]
					if not wait_func():
						return
					c = Recaptcha(BASE_URL, recaptcha_link)
					challenge, response = c.solve_captcha()
					if response:
						if not wait_func():
							return

						#Submit the input to the recaptcha system
						form =  urllib.urlencode({
								"op"                        : "download2",
								"id"                        : file_id,
								"rand"                      : rand_value,
								"referer"                   : url,
								"method_free"               : "+Regular+Download+",
								"method_premium"            : "",
								"recaptcha_challenge_field" : challenge,
								"recaptcha_response_field"  : response,
								"down_direct"               : 1			
								})
						download_page = opener.open(url, form, None, False, url)
						#Get the link and return it
						for line in download_page:
							if 'Download File' in line:
								return opener.open(line.split('href="')[1].split('"')[0])

		return

示例#23

0

显示文件

    def link_parser(self, url, wait_func, content_range=None):
        """"""
        try:
            wait = WAIT
            opener = URLOpen()
            it = opener.open(url)
            first_wait = False
            #Check for first wait
            for line in it:
                if 'var wf =' in line:
                    try:
                        wait = int(line.split("=")[1].split(";")[0].strip())
                        first_wait = True
                    except Exception, e:
                        logger.exception("%s: %s" % (url, e))
                        return
                break
            #Necessary to loop to reload the page, due to the wait
            for loop in range(3):
                if not wait_func():
                    return
                #First wait
                if first_wait:
                    if not wait_func(wait):
                        return
                    data = urllib.urlencode([("free", "Regular Download")])
                    url = "%sbilling?%s" % (url, data)
                    it = opener.open(url, data)
                #No first wait
                else:
                    it = opener.open(url)
                for line in it:
                    if 'name="id"' in line:
                        file_id = line.split('value="')[1].split('"')[0]
                    elif 'id="dwait"' in line:
                        it.next()
                        it.next()
                        tmp = it.next()
                        #The download is possible
                        if "form" in tmp:
                            form_action = tmp.split('action="')[1].split(
                                '"')[0]
                        #Necessary to wait
                        else:
                            it.next()
                            it.next()
                            wait = int(it.next().split("'")[1].split("'")[0])
                            if wait < 60:
                                if not wait_func(wait):
                                    return
                                #Next loop, reload the page
                                break
                            else:
                                return self.set_limit_exceeded(wait)
                    elif 'Recaptcha.create("' in line:
                        tmp = line.split('"')[1].split('"')[0]
                        recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
                        if not wait_func():
                            return
                        c = Recaptcha(BASE_URL, recaptcha_link)
                        challenge, response = c.solve_captcha()
                        if response:
                            if not wait_func():
                                return

                            #Submit the input to the recaptcha system
                            form = urllib.urlencode([
                                ("recaptcha_challenge_field", challenge),
                                ("recaptcha_response_field", response),
                                ("recaptcha_shortencode_field", "undefined")
                            ])
                            handle = opener.open(form_action, form,
                                                 content_range)
                            if not handle.info().getheader(
                                    "Content-Type") == "text/html":
                                #Captcha is good
                                return handle

示例#24

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			file_id = url.split("/")[-2]
			form_action = "%s?start=1" % (url)
			
			if not wait_func():
				return
			
			it = opener.open(form_action)
			form_action = "%s?start=1" % it.geturl() #Get the redirect url
			end = form_action.split(".")[2].split("/")[0] #Get the .com replacement
			form_action2 = "%s/%s/%s?start=1" % (BASE_URL,file_id,file_id)
			form_action2 = form_action2.replace(".com",".%s" % end)
			form = urllib.urlencode([("foo","foo")]) #Force urllib2 to do a POST
			#FIXME : urlopen should be able to set custom headers
			headers = {"User-Agent": cons.USER_AGENT, "X-Requested-With": "XMLHttpRequest"}
			it = opener.opener.open(urllib2.Request(form_action2, None, headers), form)
			it_tmp = None

			#Loop until we get the captcha
			for loop in range(3):
				if not wait_func():
					return
				#it_tmp is set after a wait
				if it_tmp:
					it = it_tmp
				for line in it:
					if 'Recaptcha.create("' in line:
						tmp = line.split('"')[1].split('"')[0]
						recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
						if not wait_func():
							return
						c = Recaptcha(BASE_URL, recaptcha_link)
						for retry in range(3):
							challenge, response = c.solve_captcha()
							if response:
								if not wait_func():
									return
							
								#Submit the input to the recaptcha system
								form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response)])
								it = opener.open(form_action, form)
								#Get the link
								for line in it:
									if 'downloadLink' in line:
										it.next()
										return opener.open(it.next().split('href="')[1].split('"')[0])
				
					#Link already there
					elif 'downloadLink' in line:
						it.next()
						return opener.open(it.next().split('href="')[1].split('"')[0])
					
					#Need to wait
					elif "name='tm'" in line:
						tm = line.split("value='")[1].split("'")[0];
						tm_hash = it.next().split("value='")[1].split("'")[0];
						form = urllib.urlencode([("tm", tm), ("tm_hash", tm_hash)])
				
					#Need to wait
					elif "countDownDelay =" in line:
						wait = int(line.split("=")[1].split(";")[0])
						if wait < 60:
							if not wait_func(wait):
								return
							it_tmp = opener.open(form_action, form) #fetch the page
							#Next loop, reload the page
							break
						else:
							return self.set_limit_exceeded(wait)
		except Exception, e:
			logger.exception("%s: %s" % (url, e))

示例#25

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			pkr = None
			cookie = cookielib.CookieJar()
			opener = URLOpen(cookie)
			res = ""
			#Open the first page
			page = opener.open(url).readlines()
			for line in page:
				#Get pKr
				if "pKr='" in line:
					pkr = line.split("'")[1].split("'")[0]
				#Get the last block to unescape
				if "unescape" in line:
					tmp = line.split("break;}")[-1]
					tmp = tmp.split("var cb")[0]
					tmp = self.split_eval(tmp)
				
					#Eval the block until it's plain text
					res = self.decrypt(tmp)

			id_func = res.split("(")[0] #Name of the function containig the id refering to the div that contains the real link

			pk1 = res.split("'")[3].split("'")[0]
			qk = res.split("'")[1].split("'")[0] #Public ID of the file

			it = iter(page)
			for line in it:
				#Line containing the function to parse
				if id_func in line:
					#Try to get the crypted block
					tmp = line.split(id_func)[1].split("setTimeout")[0].split('"none";')[1]
					tmp = self.split_eval(tmp)

					#Eval until it's plain text
					res = self.decrypt(tmp)


			div_id = res.split('getElementById("')[1].split('"')[0]

			data = urllib.urlencode([("qk",qk), ("pk1", pk1), ("r", pkr),])

			form_action = "http://www.mediafire.com/dynamic/download.php?%s" %data

			#Parse the GET
			res = opener.open(form_action, data)
			line = " ".join(res)
			#Long line containing the js
			if "var" in line:
				#Decrypt the table containig the final dl var
				tmp = line.split("function dz()")[0].split(";")[2:-1]
				tmp = ";".join(tmp)
				tmp = self.split_eval(tmp)
				table = self.decrypt(tmp)
				#Result is plain text
				if "http://download" in line:
					#Get all the dl links (even the fake ones)
					var = line.split('mediafire.com/" +')
					#Get the number of the server
					serv = line.split("http://download")[1].split(".")[0]
					#Get the name of the file
					name = var[1].split('+')[1].split("/")[2].split('"')[0].strip("\\")
					
					it = iter(var)
					#Find the real link among the fake ones
					for tmp in it:
						#Real link
						if div_id in tmp:
							tmp = it.next()
							tmp = tmp.split('+')[0]
							#Get the final dl var in the table
							dl = table.split(tmp+"=")[1].split(";")[0].strip("'")
				#Result is encrypted
				else:
					tmp = line.split("case 15:")[1]
					tmp = tmp.split("break;")[0]
					tmp = tmp.split("eval(")
					#Decrypt until the real link is found
					for t in tmp:
						if "unescape" in t:
							t = self.split_eval(t)
							res = self.decrypt(t,div_id)
							if len(res) == 3:
								serv = res[0]
								var = res[1]
								name = res[2]
								break
					dl = table.split(var+"=")[1].split(";")[0].strip("'")
			url = "http://download%s.mediafire.com/%sg/%s/%s" % (serv,dl,qk,name)
			try:
				handle = opener.open(url, None, content_range)
			except Exception, e:
				return self.set_limit_exceeded()
			else:

示例#26

0

显示文件

文件： anonymous_download.py 项目： GatoLoko/tucan

	def link_parser(self, url, wait_func, content_range=None):
		""""""
		try:
			wait = WAIT
			opener = URLOpen()
			it = opener.open(url)
			first_wait = False
			#Check for first wait
			for line in it:
				if 'var wf =' in line:
					try:
						wait = int(line.split("=")[1].split(";")[0].strip())
						first_wait = True
					except Exception, e:
						logger.exception("%s: %s" % (url, e))
						return
				break
			#Necessary to loop to reload the page, due to the wait
			for loop in range(3):
				if not wait_func():
					return
				#First wait
				if first_wait:
					if not wait_func(wait):
						return
					data = urllib.urlencode([("free", "Regular Download")])
					url = "%sbilling?%s" % (url,data)
					it = opener.open(url,data)
				#No first wait
				else:
					it = opener.open(url)
				for line in it:
					if 'name="id"' in line:
						file_id = line.split('value="')[1].split('"')[0]
					elif 'id="dwait"' in line:
						it.next()
						it.next()
						tmp = it.next()
						#The download is possible
						if "form" in tmp:
							form_action = tmp.split('action="')[1].split('"')[0]
						#Necessary to wait
						else:
							it.next()
							it.next()
							wait = int(it.next().split("'")[1].split("'")[0])
							if wait < 60:
								if not wait_func(wait):
									return
								#Next loop, reload the page
								break
							else:
								return self.set_limit_exceeded(wait)
					elif 'Recaptcha.create("' in line:
						tmp = line.split('"')[1].split('"')[0]
						recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp
						if not wait_func():
							return
						c = Recaptcha(BASE_URL, recaptcha_link)
						challenge, response = c.solve_captcha()
						if response:
							if not wait_func():
								return
						
							#Submit the input to the recaptcha system
							form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined")])
							handle = opener.open(form_action, form, content_range)
							if not handle.info().getheader("Content-Type") == "text/html":
								#Captcha is good
								return handle