def link_parser(self, url, wait_func, content_range=None): """""" #Remove the filename from the url tmp = url.split("/file/")[1].split("/")[0] url = "%s/file/%s" % (BASE_URL,tmp) link = None retry = 3 try: opener = URLOpen() for line in opener.open(url): if 'check:' in line: check = line.split("check:'")[1].replace("'","").strip() elif "Recaptcha.create" in line: tmp = line.split('("')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) while not link and retry: challenge, response = c.solve_captcha() if response: if not wait_func(): return #Filefactory perfoms as check on its server by doing an #Ajax request sending the following data form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined"),("check", check)]) url = "%s/file/checkCaptcha.php" % BASE_URL #Getting the result back, status:{"ok"|"fail"} for line in opener.open(url, form): if 'status:"ok"' in line: tmp = line.split('path:"')[1].strip('"') tmp_link = "%s%s" %(BASE_URL,tmp) for line in opener.open(tmp_link): if '<span class="countdown">' in line: #Try to get WAIT from the page try: tmp = line.split('"countdown">')[1].split("</span")[0] tmp = int(tmp) except ValueError: pass else: if tmp > 0: WAIT = tmp if "Download with FileFactory Basic" in line: link = line.split('<a href="')[1].split('"')[0] break retry -= 1 break if link: if not wait_func(WAIT): return return opener.open(link, None, content_range, True) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" auth_string = self.get_cookie() if not wait_func(): return encoded_link = 'http://api.hotfile.com/?action=getdirectdownloadlink&link=' + url + auth_string logger.info("Encoded link %s" % (encoded_link)) opener = URLOpen() handler = opener.open(encoded_link) actual_link = handler.readline() return opener.open(actual_link)
def parse_wait(self, url): """""" link = None form = None wait = 0 found = False try: tmp_form = [] opener = URLOpen() for line in opener.open(url): if "download_file" in line: found = True elif found: if "method=post " in line: link = "%s%s" % (BASE_URL, line.split('action="')[1].split('" ')[0]) elif "name=action " in line: tmp_form.append(("action", line.split("value=")[1].split(">")[0])) elif "name=tm " in line: tmp_form.append(("tm", line.split("value=")[1].split(">")[0])) elif "name=tmhash " in line: tmp_form.append(("tmhash", line.split("value=")[1].split(">")[0])) elif "name=wait " in line: wait = int(line.split("value=")[1].split(">")[0]) tmp_form.append(("wait", wait)) elif "name=waithash " in line: tmp_form.append(("waithash", line.split("value=")[1].split(">")[0])) elif "name=upidhash " in line: tmp_form.append(("upidhash", line.split("value=")[1].split(">")[0])) found = False form = urllib.urlencode(tmp_form) except Exception, e: logger.exception("%s: %s" % (url, e))
def get_cookie(self, user, password, url=None): """""" if user == None or password == None: return None cookie = cookielib.CookieJar() opener = URLOpen(cookie) encoded_str = urllib.urlencode({ "password": password, "login" : user, "rand" : "", "redirect": "", "op" : "login" }) opener.open("http://www.oron.com/login", encoded_str) if len(cookie) > 0: return cookie
def link_parser(self, url, wait_func, content_range=None): """""" link = None retry = 3 try: if "?" in url: url = url.split("?")[0] tmp_link, tmp_form, wait = self.parse_wait(url) if not tmp_link or not tmp_form: return self.set_limit_exceeded() elif not wait_func(wait): return else: opener = URLOpen(cookielib.CookieJar()) it = opener.open(tmp_link, tmp_form) for line in it: if "function starthtimer(){" in line: it.next() try: tmp = int(it.next().split("+")[1].split(";")[0]) return self.set_limit_exceeded(int(tmp/1000)) except Exception, e: logger.exception("%s: %s" % (url, e)) return elif "click_download" in line: link = line.split('href="')[1].split('"')[0] break elif "http://api.recaptcha.net/challenge" in line: recaptcha_link = line.split('src="')[1].split('"')[0] if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) while not link and retry: challenge, response = c.solve_captcha() if response: if not wait_func(): return form = urllib.urlencode([("action", "checkcaptcha"), ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response)]) for line in opener.open(tmp_link, form): if "click_download" in line: link = line.split('href="')[1].split('"')[0] break retry -= 1 break
def link_parser(self, url, wait_func, content_range=None): """""" try: wait = WAIT link = None opener = URLOpen() #Transform the url into an english one url = "%s%s" % (BASE_URL, url.split("/files/")[1].split("/")[0]) form = urllib.urlencode([('gateway_result','1')]) for line in opener.open(url,form): #Try to get WAIT from the page if 'download_waiter_remain' in line: try: tmp = line.split(">")[2].split("<")[0] tmp = int(tmp) except Exception, e: pass else: if tmp > 0: wait = tmp elif "$('#download_container').load('" in line: try: tmp = line.split("load('")[1].split("'")[0] url = "%s%s" % ("http://depositfiles.com", tmp) except Exception, e: pass if not wait_func(wait + 1): return #Due to a bug in DepositFiles, sometimes it returns "Invalid params" #If it's the case, retry, 10 times and set limit exceeded for attempt in range(10): for line in opener.open(url): if "Invalid" in line: if not wait_func(): return break elif "action" in line: link = line.split('"')[1].split('"')[0] break if link: break
def get_cookie(self, user, password, url=None): """""" if user == None or password == None: return None cookie = cookielib.CookieJar() opener = URLOpen(cookie) encoded_str = urllib.urlencode({ "loginUserName":user, "loginUserPassword":password, "autoLogin":"******", "recaptcha_response_field":"", "recaptcha_challenge_field":"", "recaptcha_shortencode_field":"", "loginFormSubmit":"Login"}) #logger.warning("Submitting this post: %s" % encoded_str) opener.open("http://www.fileserve.com/login.php", encoded_str) if len(cookie) > 0: return cookie
def get_cookie(self, user, password, url=None): """""" opener = URLOpen() data = urllib.urlencode([("sub", "getaccountdetails_v1"), ("type", "prem"), ("login", user), ("password", password), ("withcookie", 1)]) for line in opener.open(API_URL, data).readlines(): if "ERROR" in line: return elif "cookie" in line: tmp_cookie = cookielib.Cookie(version=0, name='enc', value=line.split("=")[1].strip(), port=None, port_specified=False, domain='.rapidshare.com', domain_specified=False, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookie = cookielib.CookieJar() cookie.set_cookie(tmp_cookie) return cookie
def link_parser(self, url, wait_func, content_range=None): """""" try: #Remove the filename from the url tmp = url.split("/file/")[1].split("/")[0] url = "%s/file/%s" % (BASE_URL, tmp) file_id = url.split("/")[-1].strip("/") cookie = cookielib.CookieJar() opener = URLOpen(cookie) form = urllib.urlencode([("checkDownload", "check")]) #If the limit is exceeded if '"fail":"timeLimit"' in opener.open(url, form).read(): return self.set_limit_exceeded() it = opener.open(url) for line in it: if 'reCAPTCHA_publickey=' in line: tmp = line.split("'")[1].split("'")[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) for retry in range(3): challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", file_id) ]) recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL #Captcha is good if "success" in opener.open(recaptcha_url, form).read(): form = urllib.urlencode([("downloadLink", "wait")]) wait = int(opener.open(url, form).read()[-2:]) if not wait_func(wait): return form = urllib.urlencode([("downloadLink", "show")]) opener.open(url, form).read() form = urllib.urlencode([("download", "normal") ]) return opener.open(url, form) #,content_range) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: cookie = self.get_cookie() if not wait_func(): return opener = URLOpen(cookie) handler = opener.open(url, None, content_range) if not wait_func(): return if "text/html" in handler.info()["Content-Type"]: cookie_value = cookie._cookies[".rapidshare.com"]["/"]["enc"].value tmp = url.split("/") form = urllib.urlencode([("sub", "download_v1"), ("cookie", cookie_value), ("fileid", tmp[4]), ("filename", tmp[5])]) for line in opener.open("http://api.rapidshare.com%s" % API_URL, form, content_range): if "DL:" in line: tmp_url = "http://%s%s" % (line.split("DL:")[1].split(",")[0], API_URL) return opener.open(tmp_url, form, content_range) else: return handler except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: tmp_link = None link = None wait = WAIT opener = URLOpen(cookielib.CookieJar()) it = opener.open(url) for line in it: if "dbtn" in line: tmp_link = line.split('href="')[1].split('"')[0] if tmp_link: it = opener.open(tmp_link) for line in it: if "id='divDLStart'" in line: link = it.next().split("<a href='")[1].split("'")[0] elif '<div class="sec">' in line: wait = int(line.split(">")[1].split("<")[0]) if not link: return elif not wait_func(wait): return except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: link = None opener = URLOpen() form = urllib.urlencode([('download',' REGULAR DOWNLOAD ')]) for line in opener.open(url,form): if '<span id="spn_download_link">' in line: link = line.split('href="')[1].split('"')[0] if not link: return if not wait_func(): return except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" found = False try: cookie = self.get_cookie() if not wait_func(): return opener = URLOpen(cookie) handler = opener.open(url, None, content_range) if not wait_func(): return else: return handler except Exception, e: logger.exception("%s: %s" % (url, e))
class PremiumCookie: """""" def __init__(self): """""" self.digestURL = URLOpen() def get_cookie(self, user, password, url=None): """""" if user == None or password == None: return None DigestURLHandler = self.digestURL.open('http://api.hotfile.com/?action=getdigest') # retrieve MD5 digest md5Digest = DigestURLHandler.readline() md5pw = hashlib.md5(password).hexdigest() md5pw = hashlib.md5(md5pw+md5Digest).hexdigest() return '&username='******'&passwordmd5dig='+md5pw+'&digest='+md5Digest
def link_parser(self, url, wait_func, content_range=None): """""" try: #Remove the filename from the url tmp = url.split("/file/")[1].split("/")[0] url = "%s/file/%s" % (BASE_URL,tmp) file_id = url.split("/")[-1].strip("/") cookie = cookielib.CookieJar() opener = URLOpen(cookie) form = urllib.urlencode([("checkDownload", "check")]) #If the limit is exceeded if '"fail":"timeLimit"' in opener.open(url,form).read(): return self.set_limit_exceeded() it = opener.open(url) for line in it: if 'reCAPTCHA_publickey=' in line: tmp = line.split("'")[1].split("'")[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) for retry in range(3): challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field",file_id)]) recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL #Captcha is good if "success" in opener.open(recaptcha_url,form).read(): form = urllib.urlencode([("downloadLink", "wait")]) wait = int(opener.open(url,form).read()[-2:]) if not wait_func(wait): return form = urllib.urlencode([("downloadLink", "show")]) opener.open(url,form).read() form = urllib.urlencode([("download", "normal")]) return opener.open(url,form)#,content_range) except Exception, e: logger.exception("%s: %s" % (url, e))
class PremiumCookie: """""" def __init__(self): """""" self.digestURL = URLOpen() def get_cookie(self, user, password, url=None): """""" if user == None or password == None: return None DigestURLHandler = self.digestURL.open( 'http://api.hotfile.com/?action=getdigest') # retrieve MD5 digest md5Digest = DigestURLHandler.readline() md5pw = hashlib.md5(password).hexdigest() md5pw = hashlib.md5(md5pw + md5Digest).hexdigest() return '&username='******'&passwordmd5dig=' + md5pw + '&digest=' + md5Digest
def link_parser(self, url, wait_func, content_range=None): """""" link = None wait = 0 try: tmp = url.split("/") opener = URLOpen() url = "%s&fileid=%s" % (API_URL,tmp[4]) url = "%s&filename=%s" % (url,tmp[5]) for line in opener.open("http://%s%s" % ("api.rapidshare.com",url)): print line if "DL:" in line: tmp = line.split("DL:")[1].split(",") link = "http://%s%s&dlauth=%s" % (tmp[0],url,tmp[1]) wait = int(tmp[2]) print link if not wait_func(wait): return if link: return URLOpen().open(link, content_range) else: return self.set_limit_exceeded() except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: link = None cookie = cookielib.CookieJar() opener = URLOpen(cookie) if "/video/" in url: url = url.replace("/video/", "/download/") elif "/audio/" in url: url = url.replace("/audio/", "/download/") elif "/image/" in url: url = url.replace("/image/", "/download/") try: form = urllib.urlencode([("download", 1)]) for line in opener.open(url,form): if 'link_enc=new Array' in line: tmp = line.strip().split("var link_enc=new Array(")[1].split(");")[0] link = tmp.replace("','","").replace("'","") #Try to get WAIT from the page if 'document|important' in line: try: tmp = line.split("here|")[1].split("|class")[0] tmp = int(tmp) except ValueError: pass else: if tmp > 0: WAIT = tmp break except Exception, e: logger.exception("%s :%s" % (url, e)) if not link: return if not wait_func(WAIT): return
def link_parser(self, url, wait_func, content_range=None): """""" try: link = [] #One link at the end is in two parts captcha_url = None wait = WAIT cookie = cookielib.CookieJar() opener = URLOpen(cookie) if not wait_func(): return #Get the captcha url data = urllib.urlencode([("rs", "refreshImage"), ("rst", ""), ("rsrnd", int(time.time()))]) tmp = opener.open(url, data).read().split("+:var res = '")[1].split("'; res;")[0].replace('\\"', '"') form_action = tmp.split('action="')[1].split('"')[0] cap_id = tmp.split('name=cap_id value=')[1].split('>')[0] cap_secret = tmp.split('name=cap_secret value=')[1].split('>')[0] captcha_url = "%s%s" % (BASE_URL, tmp.split('img src="')[1].split('"')[0]) if captcha_url: solved = False cont = 0 while (not solved) and cont < 4: tes = Tesseract(opener.open(captcha_url).read(), self.filter_image) captcha = tes.get_captcha() #Crack trick to optimize the OCR if len(captcha) == 4 and captcha.isalnum(): if not captcha.isalpha(): for i, j in [("0", "O"),("1", "I"),("2", "Z"),("3", "B"),("4", "A"),("5", "S"),("6", "G"),("7", "T"),("8", "B"),("9", "B")]: captcha = captcha.replace(i,j) captcha = captcha.upper() #Captcha : 4 letters if len(captcha) == 4 and captcha.isalpha(): if not wait_func(): return logger.info("Captcha: %s" % captcha) data = urllib.urlencode([("user_code", captcha), ("cap_id",cap_id), ("cap_secret",cap_secret)]) it = opener.open(form_action, data) z = None h = None for line in it: if "'z':'I!" in line: z = line.split("'z':'")[1].split("'")[0] h = line.split("'h':'")[1].split("'")[0] elif 'window.location.href = dlUrl' in line: it.next() link.append(it.next().split('"')[1].split('"')[0]) solved = True #If there is this line, the captcha is good break cont += 1 #If the captcha is good if solved and z and h: logger.info("Good captcha") if not wait_func(): return data = urllib.urlencode([("id",form_action.split("/")[-1]), ("type","file"), ("ext",""),("f","download:init"),("z","zvar"),("h","hvar")]) data = data.replace("zvar",z).replace("hvar",h) #The referer needs to be specify res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action) t = None wait = None z = None h = None for line in res: if "'z'" in line: z = line.split("'z': '")[1].split("'")[0] elif "'h'" in line: h = line.split("'h': '")[1].split("'")[0] elif "'t'" in line: t = line.split("'t': '")[1].split("'")[0] elif "check_n" in line: wait = int(line.split('[\'check_n\'] = "')[1].split('"')[0]) if not wait: wait = WAIT if not wait_func(wait): return data = urllib.urlencode([("id",form_action.split("/")[-1]), ("type","file"), ("ext",""),("f","download:check"),("z","zvar"),("h","hvar"),("t",t)]) data = data.replace("zvar",z).replace("hvar",h) res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action) t = None z = None h = None #Sometimes it sends another check_n while True: if not wait_func(): return res = opener.open("%s%s" % (BASE_URL,JS_URL), data,None,True,form_action) wait = None for line in res: if "check_n" in line: wait = int(line.split("=")[1].split(";")[0]) break elif "'z'" in line: z = line.split("'z': '")[1].split("'")[0] elif "'h'" in line: h = line.split("'h': '")[1].split("'")[0] elif "'t'" in line: t = line.split("'t': '")[1].split("'")[0] if not wait: break else: if not wait_func(wait): return if not wait_func(): return data = urllib.urlencode([("rs","getFileLink"),("rst",""),("rsrnd",int(time.time())),("rsargs[]","0"),("rsargs[]","yellow"),("rsargs[]","zvar"),("rsargs[]","hvar"),("rsargs[]",t),("rsargs[]","file"),("rsargs[]",form_action.split("/")[-1]),("rsargs[]","")]) data = data.replace("zvar",z).replace("hvar",h) #This cookie needs to be added manually gflcur = cookielib.Cookie(version=0, name='_gflCur', value='0', port=None, port_specified=False, domain='www.badongo.com', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookie.set_cookie(gflcur) res = opener.open(form_action, data,None,True,form_action).readlines() tmp = res[0].split('onclick')[2].split('(')[1].split("')")[0].replace('\\','').strip("'") link.append(tmp) if not wait_func(): return url = "%s%s?zenc=" %(link[1],link[0]) res = opener.open(url, data,None,True,form_action) for line in res: if "window.location.href = '" in line: final_url = line.split("window.location.href = '")[1].split("'")[0] break return opener.open("%s%s" % (BASE_URL,final_url), data,content_range,True,url) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: link = [] #One link at the end is in two parts captcha_url = None wait = WAIT cookie = cookielib.CookieJar() opener = URLOpen(cookie) if not wait_func(): return #Get the captcha url data = urllib.urlencode([("rs", "refreshImage"), ("rst", ""), ("rsrnd", int(time.time()))]) tmp = opener.open(url, data).read().split( "+:var res = '")[1].split("'; res;")[0].replace('\\"', '"') form_action = tmp.split('action="')[1].split('"')[0] cap_id = tmp.split('name=cap_id value=')[1].split('>')[0] cap_secret = tmp.split('name=cap_secret value=')[1].split('>')[0] captcha_url = "%s%s" % (BASE_URL, tmp.split('img src="')[1].split('"')[0]) if captcha_url: solved = False cont = 0 while (not solved) and cont < 4: tes = Tesseract( opener.open(captcha_url).read(), self.filter_image) captcha = tes.get_captcha() #Crack trick to optimize the OCR if len(captcha) == 4 and captcha.isalnum(): if not captcha.isalpha(): for i, j in [("0", "O"), ("1", "I"), ("2", "Z"), ("3", "B"), ("4", "A"), ("5", "S"), ("6", "G"), ("7", "T"), ("8", "B"), ("9", "B")]: captcha = captcha.replace(i, j) captcha = captcha.upper() #Captcha : 4 letters if len(captcha) == 4 and captcha.isalpha(): if not wait_func(): return logger.info("Captcha: %s" % captcha) data = urllib.urlencode([("user_code", captcha), ("cap_id", cap_id), ("cap_secret", cap_secret)]) it = opener.open(form_action, data) z = None h = None for line in it: if "'z':'I!" in line: z = line.split("'z':'")[1].split("'")[0] h = line.split("'h':'")[1].split("'")[0] elif 'window.location.href = dlUrl' in line: it.next() link.append( it.next().split('"')[1].split('"')[0]) solved = True #If there is this line, the captcha is good break cont += 1 #If the captcha is good if solved and z and h: logger.info("Good captcha") if not wait_func(): return data = urllib.urlencode([ ("id", form_action.split("/")[-1]), ("type", "file"), ("ext", ""), ("f", "download:init"), ("z", "zvar"), ("h", "hvar") ]) data = data.replace("zvar", z).replace("hvar", h) #The referer needs to be specify res = opener.open("%s%s" % (BASE_URL, JS_URL), data, None, True, form_action) t = None wait = None z = None h = None for line in res: if "'z'" in line: z = line.split("'z': '")[1].split("'")[0] elif "'h'" in line: h = line.split("'h': '")[1].split("'")[0] elif "'t'" in line: t = line.split("'t': '")[1].split("'")[0] elif "check_n" in line: wait = int( line.split('[\'check_n\'] = "') [1].split('"')[0]) if not wait: wait = WAIT if not wait_func(wait): return data = urllib.urlencode([ ("id", form_action.split("/")[-1]), ("type", "file"), ("ext", ""), ("f", "download:check"), ("z", "zvar"), ("h", "hvar"), ("t", t) ]) data = data.replace("zvar", z).replace("hvar", h) res = opener.open("%s%s" % (BASE_URL, JS_URL), data, None, True, form_action) t = None z = None h = None #Sometimes it sends another check_n while True: if not wait_func(): return res = opener.open("%s%s" % (BASE_URL, JS_URL), data, None, True, form_action) wait = None for line in res: if "check_n" in line: wait = int( line.split("=")[1].split(";")[0]) break elif "'z'" in line: z = line.split("'z': '")[1].split( "'")[0] elif "'h'" in line: h = line.split("'h': '")[1].split( "'")[0] elif "'t'" in line: t = line.split("'t': '")[1].split( "'")[0] if not wait: break else: if not wait_func(wait): return if not wait_func(): return data = urllib.urlencode([ ("rs", "getFileLink"), ("rst", ""), ("rsrnd", int(time.time())), ("rsargs[]", "0"), ("rsargs[]", "yellow"), ("rsargs[]", "zvar"), ("rsargs[]", "hvar"), ("rsargs[]", t), ("rsargs[]", "file"), ("rsargs[]", form_action.split("/")[-1]), ("rsargs[]", "") ]) data = data.replace("zvar", z).replace("hvar", h) #This cookie needs to be added manually gflcur = cookielib.Cookie(version=0, name='_gflCur', value='0', port=None, port_specified=False, domain='www.badongo.com', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookie.set_cookie(gflcur) res = opener.open(form_action, data, None, True, form_action).readlines() tmp = res[0].split('onclick')[2].split( '(')[1].split("')")[0].replace('\\', '').strip("'") link.append(tmp) if not wait_func(): return url = "%s%s?zenc=" % (link[1], link[0]) res = opener.open(url, data, None, True, form_action) for line in res: if "window.location.href = '" in line: final_url = line.split( "window.location.href = '")[1].split( "'")[0] break return opener.open("%s%s" % (BASE_URL, final_url), data, content_range, True, url) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """ Oron links usually look like this: http://www.oron.com/file_id/file_name.foo.html However, by testing it seems that the server pulls the file name out by using the file_id, which is some sort of hash. So the same file can aswell be accessed by: http://www.oron.com/file_id/file_name.foo.html.html and http://www.oron.com/file_id/file_name.foo.html.html(.html)* So we use check_links to get the file name form the HTML page, its slower, but more accurate as we cannot rely on the url passed here """ file_id = url.split("/")[3] file_name = self.check_links(url)[0] encoded_str = urllib.urlencode({ "op" : "download1", "usr_login" : "", "id" : file_id, "name" : file_name, "referer" : "", "method_free" : "+Regular+Download+"}) opener = URLOpen() """ The url we are currently trying to open is the origin (referring) URL preceding the post """ web_page = opener.open(url, encoded_str, False, url) for retry in range(3): if not wait_func(): return for line in web_page: if '<input type="hidden" name="rand" value="' in line: rand_value = line.split('value="')[1].split('"')[0] break if not rand_value: logger.warning("Oron Plugin: No random value in download page- template changed?"); return self.set_limit_exceeded(wait) for line in web_page: if '<span id="countdown">' in line: wait_length = line.split('<span id="countdown">')[1].split('<')[0] if not wait_func(int(wait_length)): return """ Check for longer limits """ if '<p class="err"' in line: parse_line = line.split('>')[1].split('<')[0] seconds = 0 minutes = 0 hours = 0 prev_word = '' for word in parse_line.split(' '): if word == 'hour,' or word == 'hours,': hours = int(prev_word) elif word == 'minute,' or word == 'minutes,': minutes = int(prev_word) elif word == 'second' or word == 'seconds': seconds = int(prev_word) break else: prev_word = word seconds = seconds + (minutes * 60) + (hours * 3600) return self.set_limit_exceeded(seconds) if 'http://api.recaptcha.net/challenge?' in line: recaptcha_link = line.split('src="')[1].split('"')[0] if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode({ "op" : "download2", "id" : file_id, "rand" : rand_value, "referer" : url, "method_free" : "+Regular+Download+", "method_premium" : "", "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : response, "down_direct" : 1 }) download_page = opener.open(url, form, None, False, url) #Get the link and return it for line in download_page: if 'Download File' in line: return opener.open(line.split('href="')[1].split('"')[0]) return
def link_parser(self, url, wait_func, content_range=None): """""" try: wait = WAIT opener = URLOpen() it = opener.open(url) first_wait = False #Check for first wait for line in it: if 'var wf =' in line: try: wait = int(line.split("=")[1].split(";")[0].strip()) first_wait = True except Exception, e: logger.exception("%s: %s" % (url, e)) return break #Necessary to loop to reload the page, due to the wait for loop in range(3): if not wait_func(): return #First wait if first_wait: if not wait_func(wait): return data = urllib.urlencode([("free", "Regular Download")]) url = "%sbilling?%s" % (url, data) it = opener.open(url, data) #No first wait else: it = opener.open(url) for line in it: if 'name="id"' in line: file_id = line.split('value="')[1].split('"')[0] elif 'id="dwait"' in line: it.next() it.next() tmp = it.next() #The download is possible if "form" in tmp: form_action = tmp.split('action="')[1].split( '"')[0] #Necessary to wait else: it.next() it.next() wait = int(it.next().split("'")[1].split("'")[0]) if wait < 60: if not wait_func(wait): return #Next loop, reload the page break else: return self.set_limit_exceeded(wait) elif 'Recaptcha.create("' in line: tmp = line.split('"')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined") ]) handle = opener.open(form_action, form, content_range) if not handle.info().getheader( "Content-Type") == "text/html": #Captcha is good return handle
def link_parser(self, url, wait_func, content_range=None): """""" try: cookie = cookielib.CookieJar() opener = URLOpen(cookie) file_id = url.split("/")[-2] form_action = "%s?start=1" % (url) if not wait_func(): return it = opener.open(form_action) form_action = "%s?start=1" % it.geturl() #Get the redirect url end = form_action.split(".")[2].split("/")[0] #Get the .com replacement form_action2 = "%s/%s/%s?start=1" % (BASE_URL,file_id,file_id) form_action2 = form_action2.replace(".com",".%s" % end) form = urllib.urlencode([("foo","foo")]) #Force urllib2 to do a POST #FIXME : urlopen should be able to set custom headers headers = {"User-Agent": cons.USER_AGENT, "X-Requested-With": "XMLHttpRequest"} it = opener.opener.open(urllib2.Request(form_action2, None, headers), form) it_tmp = None #Loop until we get the captcha for loop in range(3): if not wait_func(): return #it_tmp is set after a wait if it_tmp: it = it_tmp for line in it: if 'Recaptcha.create("' in line: tmp = line.split('"')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) for retry in range(3): challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response)]) it = opener.open(form_action, form) #Get the link for line in it: if 'downloadLink' in line: it.next() return opener.open(it.next().split('href="')[1].split('"')[0]) #Link already there elif 'downloadLink' in line: it.next() return opener.open(it.next().split('href="')[1].split('"')[0]) #Need to wait elif "name='tm'" in line: tm = line.split("value='")[1].split("'")[0]; tm_hash = it.next().split("value='")[1].split("'")[0]; form = urllib.urlencode([("tm", tm), ("tm_hash", tm_hash)]) #Need to wait elif "countDownDelay =" in line: wait = int(line.split("=")[1].split(";")[0]) if wait < 60: if not wait_func(wait): return it_tmp = opener.open(form_action, form) #fetch the page #Next loop, reload the page break else: return self.set_limit_exceeded(wait) except Exception, e: logger.exception("%s: %s" % (url, e))
def link_parser(self, url, wait_func, content_range=None): """""" try: pkr = None cookie = cookielib.CookieJar() opener = URLOpen(cookie) res = "" #Open the first page page = opener.open(url).readlines() for line in page: #Get pKr if "pKr='" in line: pkr = line.split("'")[1].split("'")[0] #Get the last block to unescape if "unescape" in line: tmp = line.split("break;}")[-1] tmp = tmp.split("var cb")[0] tmp = self.split_eval(tmp) #Eval the block until it's plain text res = self.decrypt(tmp) id_func = res.split("(")[0] #Name of the function containig the id refering to the div that contains the real link pk1 = res.split("'")[3].split("'")[0] qk = res.split("'")[1].split("'")[0] #Public ID of the file it = iter(page) for line in it: #Line containing the function to parse if id_func in line: #Try to get the crypted block tmp = line.split(id_func)[1].split("setTimeout")[0].split('"none";')[1] tmp = self.split_eval(tmp) #Eval until it's plain text res = self.decrypt(tmp) div_id = res.split('getElementById("')[1].split('"')[0] data = urllib.urlencode([("qk",qk), ("pk1", pk1), ("r", pkr),]) form_action = "http://www.mediafire.com/dynamic/download.php?%s" %data #Parse the GET res = opener.open(form_action, data) line = " ".join(res) #Long line containing the js if "var" in line: #Decrypt the table containig the final dl var tmp = line.split("function dz()")[0].split(";")[2:-1] tmp = ";".join(tmp) tmp = self.split_eval(tmp) table = self.decrypt(tmp) #Result is plain text if "http://download" in line: #Get all the dl links (even the fake ones) var = line.split('mediafire.com/" +') #Get the number of the server serv = line.split("http://download")[1].split(".")[0] #Get the name of the file name = var[1].split('+')[1].split("/")[2].split('"')[0].strip("\\") it = iter(var) #Find the real link among the fake ones for tmp in it: #Real link if div_id in tmp: tmp = it.next() tmp = tmp.split('+')[0] #Get the final dl var in the table dl = table.split(tmp+"=")[1].split(";")[0].strip("'") #Result is encrypted else: tmp = line.split("case 15:")[1] tmp = tmp.split("break;")[0] tmp = tmp.split("eval(") #Decrypt until the real link is found for t in tmp: if "unescape" in t: t = self.split_eval(t) res = self.decrypt(t,div_id) if len(res) == 3: serv = res[0] var = res[1] name = res[2] break dl = table.split(var+"=")[1].split(";")[0].strip("'") url = "http://download%s.mediafire.com/%sg/%s/%s" % (serv,dl,qk,name) try: handle = opener.open(url, None, content_range) except Exception, e: return self.set_limit_exceeded() else:
def link_parser(self, url, wait_func, content_range=None): """""" try: wait = WAIT opener = URLOpen() it = opener.open(url) first_wait = False #Check for first wait for line in it: if 'var wf =' in line: try: wait = int(line.split("=")[1].split(";")[0].strip()) first_wait = True except Exception, e: logger.exception("%s: %s" % (url, e)) return break #Necessary to loop to reload the page, due to the wait for loop in range(3): if not wait_func(): return #First wait if first_wait: if not wait_func(wait): return data = urllib.urlencode([("free", "Regular Download")]) url = "%sbilling?%s" % (url,data) it = opener.open(url,data) #No first wait else: it = opener.open(url) for line in it: if 'name="id"' in line: file_id = line.split('value="')[1].split('"')[0] elif 'id="dwait"' in line: it.next() it.next() tmp = it.next() #The download is possible if "form" in tmp: form_action = tmp.split('action="')[1].split('"')[0] #Necessary to wait else: it.next() it.next() wait = int(it.next().split("'")[1].split("'")[0]) if wait < 60: if not wait_func(wait): return #Next loop, reload the page break else: return self.set_limit_exceeded(wait) elif 'Recaptcha.create("' in line: tmp = line.split('"')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if not wait_func(): return c = Recaptcha(BASE_URL, recaptcha_link) challenge, response = c.solve_captcha() if response: if not wait_func(): return #Submit the input to the recaptcha system form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined")]) handle = opener.open(form_action, form, content_range) if not handle.info().getheader("Content-Type") == "text/html": #Captcha is good return handle