def get_page(self, link, form=None): #return source code. if self.is_running(): try: with URLClose(request.url_open(link, self.cookie, form)) as s: return s.read(BUFF_SZ) except (urllib2.URLError, httplib.HTTPException, socket.error) as err: logger.debug(link) raise ParsingError(err)
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): api = "http://turbobit.net/linkchecker/csv" form = urllib.urlencode({"links_to_check": link, }) try: with URLClose(URLOpen().open(api, form)) as s: line = s.read() active = int(line.split(", ")[-1].strip()) if active: link_status = cons.LINK_ALIVE with URLClose(URLOpen().open(link)) as s: for line in s: if 'class="download-file' in line: s.next() line_ = s.next() name = line_.split("'>")[-1].split("<")[0] tmp = line_.split('>')[-1].strip() size = float(tmp.split("(")[-1].split(",")[0]) unit = tmp.split(" ")[-1].split(")")[0] if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if link_status != cons.LINK_ALIVE: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def add(self): #get_login son metodos de premium_accounts.py """ Wupload API. (el link dura poco tiempo activo, asi q no lo usare) http://api.wupload.com/link?method=getDownloadLink&u=USER-EMAIL&p=PASSWORD&ids=1234 try: tmp = link.split("/file/")[1].split("/")[0] link = "%s/file/%s" % (BASE_URL, tmp) file_id = link.split("/")[-1].strip("/") url = "http://api.wupload.com/link?method=getDownloadLink" dict_form = {"u": username, "p": password, "ids": file_id} with URLClose(URLOpen().open(url, urllib.urlencode(dict_form), range=(content_range, None)), always_close=False) as s: rsp = s.read() rsp_dict = json.loads(rsp) link_list = rsp_dict["FSApi_Link"]["getDownloadLink"]["response"]["links"] link_dict = link_list[0] link_file = link_dict["url"] #http:\/\/s74.wupload.com\/apidownload\/ link_file = link_file.replace("\\", "") #link_file = url + "&u=" + username + "&p=" + password + "&ids=" + file_id except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except Exception as err: logger.exception(err) err_msg = err """ link_file = None err_msg = None source = None cookie = None cookie = self.get_cookie() try: status = cookie._cookies['.wupload.com']['/']['role'].value except Exception as err: logger.exception(err) cookie = None status = None if cookie and status == "premium": #login success try: with URLClose(URLOpen(cookie).open(self.link, range=(self.content_range, None)), always_close=False) as s: source = s #link_file = s.url link_file = self.link except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err return cookie, link_file, source, err_msg #puede ser el objeto archivo o None.
def get_solved_captcha(url, cookie, filter=None): """ @params: filter = a function wraping one or more clean_image functions. """ try: with URLClose(request.get(url, cookie=cookie)) as s: image_data = s.read() t = Tesseract(image_data, filter) result = t.get_captcha() except Exception as err: logger.exception(err) return None else: return result
def get_cookie(self): #cookie_handler """ Uso: if cookie is not None: #connection success if cookie: #login success else: #login fail else: #server down """ #for retry in range(COOKIE_CONNECTION_RETRY): url = "http://www.filesonic.com/user/login" dict_form = { "email": self.username, "redirect": "/", "password": self.password } headers = { "Content-type": "application/x-www-form-urlencoded", "X-Requested-With": "XMLHttpRequest", "Accept:": "application/json" } try: cookie = cookielib.CookieJar() with URLClose( URLOpen(cookie).open(url, urllib.urlencode(dict_form), headers=headers) ) as s: #eg: url= login-url, data = {"login": "******", "redir": "1", "username": user, "password", password} rsp_json = s.read() #print rsp_json #try: #dict_json = cjson.decode(rsp_json) #print dict_json["status"] #except: #pass #status = cookie._cookies['.filesonic.com']['/']['role'].value #anonymous, free or premium #if status == "anonymous": #login fail like. #return [] except Exception as err: #this only happen on http error, not bad-login, etc. logger.warning(err) #host_down = True else: return cookie return None #server down, cant connect.
def get_page(self, link, form=None, close=True): #return source code. if self.is_running(): link = utils.url_unescape(link) range = (None, None) if close else (self.content_range, None) try: with URLClose(request.url_open(link, self.cookie, form, range), close) as s: if close: return s.read(BUFF_SZ) else: self.dl_link = link return s except (urllib2.URLError, httplib.HTTPException, socket.error) as err: logger.debug(link) raise ParsingError(err)
def check(self, link): """ Rapidshare api: http://images.rapidshare.com/apidoc.txt Status integer, which can have the following numeric values: 0=File not found 1=File OK 3=Server down 4=File marked as illegal 5=Direct download """ name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR FILE_ID, FILE_NAME, SIZE, SERVER_ID, STATUS, SHORT_HOST, MD5 = range(7) try: id = link.split("/")[-2] file_name = link.split("/")[-1] #http://api.rapidshare.com/cgi-bin/rsapi.cgi?sub=subroutine&files=value1&filenames=value2 with URLClose( request.post("http://api.rapidshare.com/cgi-bin/rsapi.cgi", data={ "sub": "checkfiles", "files": id, "filenames": file_name }, timeout=10)) as s: tmp = s.read().split(",") #print tmp name = tmp[FILE_NAME] size = int(tmp[SIZE]) if int(tmp[STATUS]) in (1, 5): #alive or direct download link_status = cons.LINK_ALIVE elif int(tmp[STATUS]) == 3: #server down link_status = cons.LINK_UNAVAILABLE else: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" video_id = link.split("&")[0].split("=")[-1] for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) with URLClose(request.get(video_info_url)) as s: video_info = parse_qs(s.read()) if 'token' in video_info: #print video_info #print video_info_url break video_title = urllib.unquote_plus(video_info['title'][0]) return cons.LINK_ALIVE, video_title, 0, None
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link)) as s: alive = False for line in s: if '<title>' in line: tmp = line.split("-") if len(tmp) > 2: tmp_name = link.split("/files/")[-1].split("/") if len(tmp_name) == 2: name = tmp_name[-1].rstrip( ".html") #complete name else: name = tmp[0].strip().split(" ")[ -1] #shorted name, ie: filenam...part1.rar link_status = cons.LINK_ALIVE alive = True else: link_status = cons.LINK_DEAD elif alive and "<h1>" in line and name in line: tmp = line.split("-")[-1].strip() unit = tmp.split(" ")[-1] # size = float(tmp.split(" ")[0]) #convert size to bytes. if "kb" in unit.lower(): size = size * 1024 elif "mb" in unit.lower(): size = size * 1024 * 1024 elif "gb" in unit.lower(): size = size * 1024 * 1024 * 1024 break except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR try: #strip file name tmp = link.split("/file/")[1].split("/")[0] link = "%s/file/%s" % (BASE_URL, tmp) link_quoted = urllib.quote_plus(link) with URLClose( request.get( "http://www.filefactory.com/tool/links.php?func=links&links=" + link_quoted, timeout=10)) as s: alive = False for line in s: if 'Available' in line: alive = True elif alive: if 'class="metadata"' in line: name = line.split('class="metadata">')[-1].split( '</div>')[0].split('/')[-1].strip() name = html_entities_parser(name) s.next() size_list = s.next().split("<td>")[-1].split( "</td>")[0].split(" ") #size = "".join(size_list) size = int(float(size_list[0])) link_status = cons.LINK_ALIVE break if link_status != cons.LINK_ALIVE: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) logger.warning(err) except Exception as err: status_msg = "Error: {0}".format(err) logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = cons.UNKNOWN size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: if "/video/" in link: link = link.replace("/video/", "/download/") elif "/audio/" in link: link = link.replace("/audio/", "/download/") elif "/image/" in link: link = link.replace("/image/", "/download/") with URLClose(URLOpen().open(link)) as s: for line in s: if 'File Name:' in line: name = s.next().split("</font>")[0].split( '>')[-1].strip() name = misc.html_entities_parser(name) elif 'File Size:' in line: tmp = line.split("</font>")[0].split('>')[-1].strip() unit = tmp[-2:].strip() size = float(tmp[:-2]) #convert size to bytes. if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if size: link_status = cons.LINK_ALIVE else: link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0 except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: name, size = cons.UNKNOWN, 0 logger.exception(err) return link_status, name, size, status_msg
def get_account_status(self, cookie): """ Wupload API. """ try: url = "http://api.wupload.com/user?method=getInfo" dict_form = {"u": self.username, "p": self.password} with URLClose(URLOpen().open(url, urllib.urlencode(dict_form))) as s: rsp = s.read() rsp_dict = json.loads(rsp) #if rsp_dict["FSApi_User"]["getInfo"]["status"] == "success": is_premium = rsp_dict["FSApi_User"]["getInfo"]["response"]["users"]["user"]["is_premium"] if is_premium: return cons.ACCOUNT_PREMIUM else: return cons.ACCOUNT_FREE except KeyError as err: return cons.ACCOUNT_FAIL except Exception as err: #ValueError: json exception. logger.exception(err) return cons.ACCOUNT_ERROR
def get_account_status(self, cookie): """""" try: if cookie is not None: #cant connect if cookie: #login fail. with URLClose( URLOpen(cookie).open("http://www.megaupload.com") ) as s: #Close conection, en caso de except o al terminar de leer. premium = False for line in s: if 'class="stars_' in line: premium = True break if premium: return cons.ACCOUNT_PREMIUM else: return cons.ACCOUNT_FREE else: return cons.ACCOUNT_FAIL else: return cons.ACCOUNT_ERROR except (urllib2.URLError, httplib.HTTPException, socket.error) as e: return cons.ACCOUNT_ERROR
def check(self, link): """""" name = "Unknown" size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link, timeout=10)) as s: for line in s: if 'class="f_arial f_14px"' in line: name = line.split('"f_arial f_14px">')[-1].split( '<')[0].strip() name = misc.html_entities_parser(name) tmp = s.next().split(":")[-1].split("<")[0].strip() unit = tmp.split(" ")[-1].strip() size = float(tmp.split(" ")[0].strip()) #convert size to bytes. if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if size: link_status = cons.LINK_ALIVE else: link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0 except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: status_msg = "Error: {0}".format(err) name, size = cons.UNKNOWN, 0 logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = cons.UNKNOWN size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: id = link.split("d=")[-1].strip() if "&" in id: id = id.split("&")[0] #TODO: enviar cookie. with URLClose( URLOpen().open( "http://www.megaupload.com/mgr_linkcheck.php", form=urllib.urlencode({"id0": id})) ) as s: #urllib.urlencode = diccionario para hacer POST. http://www.megaupload.com/mgr_linkcheck.php&id0=id tmp = s.read().split("&") #returns a list. if len(tmp) > 4: name = "&".join(tmp[5:]).split("n=")[1] size = int(tmp[3].split("s=")[1]) link_status = cons.LINK_ALIVE elif tmp[ 2] == 'id0=3': #id0=1 dead, id0=3 unavailable, id0=0 alive #name = "Unknown" #size = None link_status = cons.LINK_UNAVAILABLE status_msg = "Temporarily Unavailable. You can add the file anyway (it will be downloaded later)" #not used, yet. else: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) pass #if isinstance(err.reason, socket.timeout): #break except Exception, err: logger.exception(err)
def check(self, link): """""" name = cons.UNKNOWN size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(URLOpen().open(link)) as s: for line in s: if 'name="description"' in line: name = line.split('content="')[-1].split( " | Free file hosting")[0] name = misc.html_entities_parser(name) elif "File Size:</b>" in line: tmp = line.split("</b>")[-1].split("</div>")[0].strip() unit = tmp[-2:] size = float(tmp[:-2]) #convert size to bytes. if unit == "KB": size = size * 1024 elif unit == "MB": size = size * 1024 * 1024 elif unit == "GB": size = size * 1024 * 1024 * 1024 break if size: link_status = cons.LINK_ALIVE else: link_status, name, size = cons.LINK_DEAD, cons.UNKNOWN, 0 except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: name, size = cons.UNKNOWN, 0 logger.exception(err) return link_status, name, size, status_msg
def check(self, link): """""" name = cons.UNKNOWN size = 0 status_msg = None link_status = cons.LINK_ERROR #for retry_count in range(RETRIES): try: with URLClose(request.get(link)) as s: found = False for line in s: if 'download_file_title">' in line: found = True link_status = cons.LINK_ALIVE name = line.split('download_file_title">')[-1].split( '<')[0].strip() tmp = line.split('class="download_link')[1].split( '<span>(')[-1].split(')')[0].strip() unit = tmp.split(" ")[-1].strip() size = float(tmp.split(" ")[0].strip()) #convert size to bytes. if unit.lower() == "kb": size = size * 1024 elif unit.lower() == "mb": size = size * 1024 * 1024 elif unit.lower() == "gb": size = size * 1024 * 1024 * 1024 break if not found: link_status = cons.LINK_DEAD except (urllib2.URLError, httplib.HTTPException, socket.error) as err: status_msg = "Error: {0}".format(err) except Exception as err: logger.exception(err) return link_status, name, size, status_msg
def add(self): #wait_func: wait method from thread_managed """ TODO: Refactory. """ link_file = None err_msg = None source = None wait = WAITING try: file_id = self.link.split("turbobit.net/")[-1].split( "/")[0].rstrip(".html") self.link = BASE_URL + "/download/free/" + file_id cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib with URLClose(opener.open(self.link)) as s1: key = None for line in s1: if "challenge?k=" in line: key = line.split('challenge?k=')[-1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % key c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) break if key is None: raise LimitExceededException("Limit Exceeded") for retry in range(3): challenge, response = c.solve_captcha() if response is not None: form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("captcha_type", "recaptcha"), ("captcha_subtype", "") ]) with URLClose(opener.open(self.link, form)) as s2: found = False for line in s2: if "limit :" in line: found = True try: wait = int( line.split(":")[-1].split(",") [0]) / 100 #ms except Exception as err: logger.exception(err) wait = WAITING elif "captcha-error" in line: err_msg = "Wrong captcha" if found: if self.wait_func(wait + 1): return self.link, None, err_msg url = BASE_URL + "/download/getLinkAfterTimeout/" + file_id print url with URLClose(opener.open(url)) as s3: for line in s3: print line if "href='/download/redirect" in line: tmp = line.split( "href='")[-1].split("'")[0] redir_url = BASE_URL + tmp print redir_url with URLClose( opener.open( redir_url)) as s4: for line in s4: if 'href="' in line: link_file = line.split( 'href="' )[-1].split('"')[0] #print link_file with URLClose( opener.open( link_file, range= (self. content_range, None)), always_close= False) as s5: source = s5 raise FileLinkFoundException( ) else: raise CaptchaException("No response from the user") except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (FileLinkFoundException, LimitExceededException, LinkErrorException, CaptchaException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) err_msg = err logger.info(err) except Exception as err: logger.exception(err) err_msg = err return link_file, source, err_msg
def add(self): """""" link_file = None err_msg = None source = None wait = WAITING max_retries = 3 try: #Remove the filename from the url tmp = self.link.split("/file/")[1].split("/")[0] self.link = "%s/file/%s" % (BASE_URL, tmp) cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib with URLClose(opener.open(self.link)) as s: if self.wait_func(): return self.link, None, err_msg for line in s: if 'check:' in line: check = line.split("check:'")[1].replace("'", "").strip() elif "Recaptcha.create" in line: tmp = line.split('("')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) for retry in range(3): if self.wait_func(): return self.link, None, err_msg if retry < (max_retries + 1): challenge, response = c.solve_captcha() if response is not None: #Filefactory perfoms as check on its server by doing an #Ajax request sending the following data form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", "undefined"), ("check", check) ]) recaptcha_link = "%s/file/checkCaptcha.php" % BASE_URL #Getting the result back, status:{"ok"|"fail"} with URLClose( opener.open(recaptcha_link, form)) as sa: if self.wait_func(): return self.link, None, err_msg for line in sa: if 'status:"ok"' in line: tmp = line.split( 'path:"')[-1].split('"')[0] tmp_link = "%s%s" % (BASE_URL, tmp) with URLClose( opener.open( tmp_link)) as sb: if self.wait_func(): return self.link, None, err_msg for line in sb: if 'countdown">' in line: #Try to get WAIT from the page try: tmp = line.split( 'countdown">' )[-1].split( "<")[0] tmp = int(tmp) if tmp > 320: raise LimitExceededException( "Limit Exceeded" ) except ValueError: pass else: if tmp > 0: wait = tmp if 'id="downloadLinkTarget' in line: link_file = line.split( '<a href="' )[1].split('"')[0] if self.wait_func( wait): return self.link, None, err_msg with URLClose( opener. open( link_file, range= (self. content_range, None )), always_close =False ) as sc: try: if sc.status == 302: #redirect error 302. raise RedirectException( "Redirection error" ) except AttributeError as err: #no redirected. source = sc raise FileLinkFoundException( ) else: raise CaptchaException( "No response from the user") else: raise CaptchaException( "Captcha, max retries reached") raise LinkNotFoundException() except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (RedirectException, FileLinkFoundException, LinkNotFoundException, CaptchaException, LimitExceededException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) elif not isinstance(err, FileLinkFoundException): logger.info(err) err_msg = err except Exception as err: logger.exception(err) return self.link, source, err_msg #puede ser el objeto archivo o None.
def add(self): #wait_func: wait method from thread_managed """ TODO: Refactory. """ link_file = None err_msg = None source = None wait = None found = False try: #Remove the filename from the url tmp = self.link.split("/file/")[1].split("/")[0] self.link = "%s/file/%s" % (BASE_URL, tmp) file_id = self.link.split("/")[-1].strip("/") cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib #form = urllib.urlencode([("checkTimeLimit", "check")]) #deprecated by fileserve form = urllib.urlencode([("checkDownload", "check")]) post_result = opener.open(self.link, form).read() if "success" not in post_result: if "timeLimit" in post_result: raise LimitExceededException("Limit Exceeded") else: raise LinkErrorException("Link Error") with URLClose(opener.open(self.link)) as s: for line in s: if 'reCAPTCHA_publickey=' in line: tmp = line.split("'")[1].split("'")[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if self.wait_func( ): #wait... if true: download was stopped return self.link, None, err_msg c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) for retry in range(3): if self.wait_func( ): #wait... if true: download was stopped return self.link, None, err_msg challenge, response = c.solve_captcha() if response is not None: #Submit the input to the recaptcha system form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("recaptcha_shortencode_field", file_id) ]) recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL #Captcha is good #on error: {"success":0,"error":"incorrect-captcha-sol"} #on succes: {"success":1} with URLClose(opener.open(recaptcha_url, form)) as sa: if "error" not in sa.read(): form = urllib.urlencode([ ("downloadLink", "wait") ]) with URLClose( opener.open(self.link, form)) as sb: wait = int(sb.read( )[-2:]) #somethimes gives fail404 if self.wait_func( wait ): #wait... if true: download was stopped return self.link, None, err_msg form = urllib.urlencode([ ("downloadLink", "show") ]) with URLClose( opener.open( self.link, form)) as sc: if self.wait_func( ): #wait... if true: download was stopped return self.link, None, err_msg sc.read() form = urllib.urlencode([ ("download", "normal") ]) with URLClose( opener.open( self.link, form, range=( self. content_range, None)), always_close=False ) as sd: if sd.url == self.link: #link not found or weird countdown issue #logger.debug(sd.read()) raise LinkErrorException( "Link Error, redirected" ) else: source = sd #,content_range) break else: err_msg = "Wrong captcha" else: raise CaptchaException( "No response from the user") except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (LimitExceededException, LinkErrorException, CaptchaException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) err_msg = err logger.info(err) except Exception as err: logger.exception(err) err_msg = err return self.link, source, err_msg #puede ser el objeto archivo o None.
def thread_download(self, fh, i, chunk, is_first): # first thread wont retry. # downloading chunk wont retry. # not downloading and not first should retry. is_downloading = False buf = StringIO() len_buf = 0 def flush(): self.flush_buffer(fh, i, chunk, buf, len_buf) try: with URLClose(self.get_source(chunk, is_first)) as s: if not is_first and not self.is_valid_range(s, chunk[START]): raise BadSource( 'Link expired, or cant download the requested range.') with self.lock2: if self.chunks_control[i]: self.chunks_control[i] = False self.conn_count += 1 is_downloading = True else: raise CanNotRun( 'Another thread has taken over this chunk.') while True: data = s.read(NT_BUFSIZ) len_data = len(data) buf.write(data) len_buf += len_data chunk = (chunk[START] + len_data, chunk[END]) if len_buf >= DATA_BUFSIZ: flush() buf = StringIO() len_buf = 0 with self.lock2: self.size_complete += len_data if self.bucket.rate: nap = self.bucket.consume(len_data) if nap: # avoid thread switching if nap == 0 time.sleep(nap) if self.stop_flag or self.error_flag: return if not len_data or (chunk[END] and chunk[START] >= chunk[END]): # end may be 0 flush() buf = StringIO() len_buf = 0 logger.debug("complete {0} {1}".format( chunk[START], chunk[END])) if not self.is_chunk_complete(chunk): raise IncompleteChunk('Incomplete chunk') chunk = self.dl_next_chunk(chunk, i + 1) logger.debug("keep dl {0} {1}".format( chunk[START], chunk[END])) i += 1 except IncompleteChunk as err: # propagate self.set_err(err) except (BadSource, CanNotRun) as err: # do not propagate logger.debug(err) except (urllib2.URLError, httplib.HTTPException, socket.error) as err: if is_first or is_downloading: # propagate self.set_err(err) else: logger.debug(err) # retry? except EnvironmentError as err: # propagate self.set_err(err) finally: if is_downloading: flush()
def add(self): #wait_func: wait method from thread_managed """ http://api.wupload.com/ """ link_file = None err_msg = None source = None wait = WAITING found = False try: #Remove the filename from the url tmp = self.link.split("/file/")[1].split("/")[0] self.link = "%s/file/%s" % (BASE_URL, tmp) file_id = self.link.split("/")[-1].strip("/") cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib #form = urllib.urlencode([("checkTimeLimit", "check")]) #deprecated by fileserve if self.wait_func(): #wait... if true: download was stopped return self.link, None, err_msg """ form_action = "{0}?start=1".format(link) it = opener.open(form_action) form_action = "{0}?start=1".format(it.geturl()) #get redirect url #end = form_action.split(".")[2].split("/")[0] #get .com replacement form_action2 = "{0}/{1}?start=1".format(link, file_id) #form_action2 = form_action2.replace(".com", end) form = urllib.urlencode([("foo", "foo")]) #force urllib2 to do a post headers = {"X-Requested-With": "XMLHttpRequest", } """ it = opener.open(self.link) form_action = "{0}?start=1".format(it.geturl()) form = urllib.urlencode({}) headers = { "X-Requested-With": "XMLHttpRequest", } with URLClose(opener.open(form_action, form, headers=headers)) as s: if self.wait_func(): #wait... if true: download was stopped return self.link, None, err_msg #when there is a countdown, the page need to be reloaded and search for the captcha again. for countdown in range(3): for line in s: if 'Recaptcha.create("' in line: tmp = line.split('"')[1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % tmp if self.wait_func( ): #wait... if true: download was stopped return self.link, None, err_msg c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) for retry in range(3): challenge, response = c.solve_captcha() if self.wait_func( ): #wait... if true: download was stopped return self.link, None, err_msg if response is not None: #Submit the input to the recaptcha system form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response) ]) #recaptcha_url = "%s/checkReCaptcha.php" % BASE_URL with URLClose( opener.open(form_action, form)) as sa: for line in sa: if 'downloadLink' in line: sa.next() link_file = sa.next().split( 'href="')[-1].split('"')[0] with URLClose( opener.open( link_file, range=( self. content_range, None)), always_close=False ) as sd: source = sd #,content_range) raise FileLinkFoundException() else: raise CaptchaException( "No response from the user") if retry == 2: raise CaptchaException( "Captcha, max retries reached") #Link already there O.o elif 'downloadLink' in line: s.next() link_file = s.next().split('href="')[-1].split( '"')[0] with URLClose(opener.open( link_file, range=(self.content_range, None)), always_close=False) as sd: source = sd #,content_range) raise FileLinkFoundException() #waiting... ? elif "name='tm'" in line: tm = line.split("value='")[-1].split("'")[0] tm_hash = s.next().split("value='")[-1].split( "'")[0] form = urllib.urlencode([("tm", tm), ("tm_hash", tm_hash)]) #waiting... elif "var countDownDelay" in line: wait = int(line.split("=")[1].split(";")[0]) if wait < 60: if self.wait_func(wait): return self.link, None, err_msg s = opener.open( form_action, form ) #fetch the page again. but posting the tm, tm_hash break else: raise LimitExceededException("Limit Exceeded") except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (FileLinkFoundException, CaptchaException, LimitExceededException, LinkNotFoundException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) elif not isinstance(err, FileLinkFoundException): logger.info(err) err_msg = err except Exception as err: logger.exception(err) err_msg = err return self.link, source, err_msg #puede ser el objeto archivo o None.
def add(self): #wait_func: wait method from thread_managed """ TODO: Refactory. """ link_file = None err_msg = None source = None wait = WAITING try: if "/ul.to/" in self.link: file_id = self.link.split("/ul.to/")[-1].split("/")[0] else: file_id = self.link.split("/file/")[-1].split("/")[0] self.link = BASE_URL + "/file/" + file_id cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib with URLClose(opener.open(self.link)) as s1: if self.wait_func(): return self.link, None, err_msg for line in s1: if 'class="free' in line: try: s1.next() wait = int(s1.next().split("<span>")[-1].split( "</span>")[0]) except Exception as err: logger.exception(err) wait = WAITING break form = urllib.urlencode({}) form_url = BASE_URL + "/io/ticket/slot/" + file_id with URLClose(opener.open(form_url, form)) as s2: s = s2.read() if "succ:true" in s: if self.wait_func(wait): return self.link, None, err_msg js_url = BASE_URL + "/js/download.js" with URLClose(opener.open(js_url)) as s3: if self.wait_func(): return self.link, None, err_msg for line in s3: if 'Recaptcha.create("' in line: key = line.split('Recaptcha.create("')[ -1].split('"')[0].strip() break print key recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % key c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) for retry in range(3): challenge, response = c.solve_captcha() if response is not None: form_url = BASE_URL + "/io/ticket/captcha/" + file_id form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response) ]) with URLClose(opener.open(form_url, form)) as s4: if self.wait_func(): return self.link, None, err_msg s = s4.read() if "download" in s: link_file = s.split("url:'")[-1].split( "'")[0] print link_file with URLClose( opener.open( link_file, range=(self.content_range, None)), always_close=False) as s5: source = s5 raise FileLinkFoundException() elif "limit-dl" in s: raise LimitExceededException( "Limit Exceeded") else: #{err:"captcha"} print s err_msg = "Wrong captcha" else: raise CaptchaException( "No response from the user") else: LinkErrorException("Link not found") except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (FileLinkFoundException, LimitExceededException, LinkErrorException, CaptchaException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) err_msg = err logger.info(err) except Exception as err: logger.exception(err) err_msg = err return link_file, source, err_msg
def add(self): """""" try: link_file = None err_msg = None source = None cookie = cookielib.CookieJar() form = None max_retries = 3 for retry in range(max_retries + 1): try: file_id = self.link.split(".com/?")[-1].split("/")[0] with URLClose(URLOpen(cookie).open(self.link, form)) as s: if self.wait_func(): return self.link, None, err_msg s_lines = s.readlines() for line in s_lines: if 'class="download_link' in line: div_list = line.split('<div') tmp_list = [ div for div in div_list if 'class="download_link' in div ] tmp_list = [ ref_tag for ref_tag in tmp_list if file_id in ref_tag ] link_file = tmp_list[0].split( 'href="')[1].split('"')[0] #Recaptcha if "challenge?k=" in line: if retry < (max_retries + 1): recaptcha_key = line.split( "challenge?k=")[-1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % recaptcha_key c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) challenge, response = c.solve_captcha() if response is not None: #Submit the input to the recaptcha system form = urllib.urlencode([ ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("downloadp", "") ]) raise PostCaptchaException( "Post captcha solution") else: raise CaptchaException( "No response from the user") else: raise CaptchaException( "Captcha, max retries reached") except PostCaptchaException as err: pass else: break if link_file is not None: with URLClose(URLOpen(cookie).open(link_file, range=(self.content_range, None)), always_close=False) as s: source = s print link_file else: raise LinkNotFoundException("Link not found") except (urllib2.URLError, httplib.HTTPException, socket.error) as err: print err err_msg = err except (CaptchaException, LinkNotFoundException) as err: print err err_msg = err logging.exception(err) except Exception as err: err_msg = err print err logging.exception(err) return link_file, source, err_msg #puede ser el objeto archivo o None.
def add(self): """ TODO: Refactory. """ link_file = None err_msg = None source = None wait = WAITING try: cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib #url parse if "file_id" in self.link: #most likely not. file_id = self.link.split("file_id=")[-1].split("&")[0] else: file_id = self.link.split("netload.in/datei")[-1].split( "/")[0].split(".")[0] self.link = BASE_URL + "/" + "index.php?id=10&file_id=" + file_id with URLClose(opener.open(self.link)) as s1: if self.wait_func(): return self.link, None, err_msg for line in s1: if 'class="Free_dl' in line: id = line.split("?id=")[-1].split("&")[0] url = BASE_URL + "/" + line.split('href="')[-1].split( '"')[0].replace("&", "&") break with URLClose(opener.open(url)) as s2: for line in s2: if "captcha.php" in line: captcha_url = BASE_URL + "/" + line.split( 'src="')[-1].split('"')[0] elif ">countdown(" in line: try: wait = int( line.split(">countdown(")[-1].split(",") [0]) / 100 #ms except Exception as err: logger.exception(err) wait = WAITING if self.wait_func(wait + 1): return self.link, None, err_msg captcha_result = tesseract.get_solved_captcha( captcha_url, cookie, self.filter) form = urllib.urlencode([("file_id", file_id), ("captcha_check", captcha_result), ("start", "")]) captcha_form_url = BASE_URL + "/" + "index.php?id=" + id with URLClose(opener.open(captcha_form_url, form)) as s3: for line in s3: if ">countdown(" in line: try: wait = int( line.split(">countdown(")[-1].split( ",")[0]) / 100 #ms except Exception as err: logger.exception(err) wait = WAITING elif 'class="Orange_Link' in line: link_file = line.split('href="')[-1].split( '"')[0] if wait > 600: # 10 minutes raise LimitExceededException("Limit exceeded") if self.wait_func(wait + 1): return self.link, None, err_msg with URLClose(opener.open(link_file, range=(self.content_range, None)), always_close=False) as s4: source = s4 raise FileLinkFoundException() except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (FileLinkFoundException, LimitExceededException, LinkErrorException, CaptchaException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) err_msg = err logger.info(err) except Exception as err: logger.exception(err) err_msg = err return link_file, source, err_msg
def add(self): #wait_func: wait method from thread_managed """""" link_file = None err_msg = None source = None wait = WAITING try: file_id = self.link.split("/files/")[1].split("/")[0] self.link = BASE_URL + "/files/" + file_id cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib form = urllib.urlencode([ ("gateway_result", "1"), ]) with URLClose(opener.open(self.link)) as s1: if self.wait_func(): return self.link, None, err_msg s1.read() with URLClose(opener.open(self.link, form)) as s2: for line in s2: if 'download_waiter_remain">' in line: wait = int( line.split('download_waiter_remain">') [-1].split('<')[0]) elif "Recaptcha.create('" in line: key = line.split("Recaptcha.create('")[-1].split( "'")[0] elif "var fid" in line: fid = line.split("'")[1] elif 'limit_interval">' in line: wait = int( line.split('limit_interval">')[-1].split("<") [0]) if wait > 320: raise LimitExceededException("Limit Exceeded") if self.wait_func(wait): return self.link, None, err_msg if key is not None: recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % key c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) for retry in range(3): if self.wait_func( ): #wait... if true: download was stopped return self.link, None, err_msg challenge, response = c.solve_captcha() if response is not None: recaptcha_url = BASE_URL + "/get_file.php?fid=" + fid + "&challenge=" + challenge + "&response=" + response with URLClose( opener.open(recaptcha_url)) as s3: if self.wait_func(): return self.link, None, err_msg for line in s3: if 'form action="' in line and not "recaptcha" in line: link_file = line.split( 'form action="')[-1].split( '"')[0] #print link_file with URLClose( opener.open( link_file, range=( self.content_range, None)), always_close=False) as s4: source = s4 raise FileLinkFoundException() err_msg = "Wrong captcha" else: raise CaptchaException( "No response from the user") except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (FileLinkFoundException, LimitExceededException, LinkErrorException, CaptchaException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) err_msg = err logger.info(err) except Exception as err: logger.exception(err) err_msg = err return link_file, source, err_msg
def add(self): """""" link_file = None err_msg = None source = None wait = WAITING try: #Remove the filename from the url file_id = self.link.split("/oron.com/")[1].split("/")[0] self.link = "%s/%s" % (BASE_URL, file_id) cookie = cookielib.CookieJar() with URLClose(URLOpen(cookie).open(self.link)) as s: if self.wait_func(): return self.link, None, err_msg fname = None for line in s: if 'name="fname"' in line: fname = line.split('value="')[-1].split('"')[0] if fname is not None: dict_form = { "op": "download1", "usr_login": "", "id": file_id, "fname": fname, "referer": "", "method_free": " Regular Download " } headers = { "Content-type": "application/x-www-form-urlencoded", } with URLClose( URLOpen(cookie).open(self.link, urllib.urlencode(dict_form), headers=headers)) as sa: if self.wait_func(): return self.link, None, err_msg rand = None referer = None recaptcha_key = None for line in sa: if 'id="countdown"' in line: wait = int( line.split('id="countdown">')[-1].split( '<')[0].strip()) elif 'name="rand"' in line: rand = line.split('value="')[-1].split('"')[0] elif 'name="referer"' in line: referer = line.split('value="')[-1].split( '"')[0] elif "challenge?k=" in line: recaptcha_key = line.split( "challenge?k=")[-1].split('"')[0] if None not in (rand, referer, recaptcha_key): if self.wait_func( wait ): #wait... if true: download was stopped return self.link, None, err_msg recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % recaptcha_key c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) challenge, response = c.solve_captcha() if response is not None: dict_form = { "op": "download2", "id": file_id, "rand": rand, "referer": referer, "method_free": " Regular Download ", "method_premium": "", "recaptcha_challenge_field": challenge, "recaptcha_response_field": response, "down_direct": "1" } with URLClose( URLOpen(cookie).open( self.link, urllib.urlencode(dict_form), headers=headers)) as sb: if self.wait_func(): return self.link, None, err_msg for line in sb: if 'class="atitle"' in line: link_file = line.split( 'href="')[-1].split('"')[0] elif "Wrong captcha" in line: raise CaptchaException( "Wrong captcha") if link_file is not None: with URLClose( URLOpen(cookie).open( link_file, range=(self.content_range, None)), always_close=False) as sc: source = sc else: #link not found raise LinkErrorException("Link Error") else: raise CaptchaException( "No response from the user") else: #limit exceeded #TODO: Fix for big files (+1gb), since regular users cant download them raise LimitExceededException("Limit Exceeded") else: #link not found raise LinkErrorException("Link Error") except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (LimitExceededException, CaptchaException, LinkErrorException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) err_msg = err logger.info(err) except Exception as err: err_msg logger.exception(err) return link_file, source, err_msg #puede ser el objeto archivo o None.
def add(self): """""" try: link_file = None err_msg = None source = None res = "" pkr = "" cookie = cookielib.CookieJar() form = None max_retries = 3 for retry in range(max_retries + 1): try: #First encrypted page. with URLClose(URLOpen(cookie).open(self.link, form)) as s: if self.wait_func(): return self.link, None, err_msg s_lines = s.readlines() for line in s_lines: #Get pKr if "pKr='" in line: pkr = line.split("'")[1].split("'")[0] #Get the last block to unescape if "unescape" in line: tmp = line.split("break;}")[-1] tmp = self.split_eval(tmp) #Eval the block until it's plain text res = self.decrypt(tmp) #Recaptcha if "challenge?k=" in line: if retry < (max_retries + 1): recaptcha_key = line.split("challenge?k=")[-1].split('"')[0] recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % recaptcha_key c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) challenge, response = c.solve_captcha() if response is not None: #Submit the input to the recaptcha system form = urllib.urlencode([("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response), ("downloadp", "")]) raise PostCaptchaException("Post captcha solution") else: raise CaptchaException("No response from the user") else: raise CaptchaException("Captcha, max retries reached") id_func = res.split("(")[0] #Name of the function containig the id refering to the div that contains the real link pk1 = res.split("'")[3].split("'")[0] qk = res.split("'")[1].split("'")[0] #Public ID of the file for line in s_lines: #Line containing the function to parse if id_func in line: #Try to get the crypted block try: tmp = line.split(id_func)[1].split("setTimeout")[0].split('"none";')[1] tmp = self.split_eval(tmp) except Exception as err: print line raise #Decrypt until it's plain text res = self.decrypt(tmp) div_id = res.split('getElementById("')[1].split('"')[0] data = urllib.urlencode([("qk",qk), ("pk1", pk1), ("r", pkr),]) form_action = "http://www.mediafire.com/dynamic/download.php?%s" % data except PostCaptchaException as err: pass else: break try: #Second encrypted page. with URLClose(URLOpen(cookie).open(form_action)) as s: if self.wait_func(): return self.link, None, err_msg s_lines = s.readlines() for line in s_lines: #s_lines[1:] we dont care about the first line #print "NEW " + line #Table with the real and fakes dl var. if "function dz()" in line: #Decrypt the table containig the final dl var tmp = line.split("break;")[0].split("eval(") for t in tmp: if "unescape" in t: t = t.replace("\\","") table = self.decrypt(t) #Result is plain text (small files) not working. if "http://download" in line: #Get all the dl links (even the fake ones) var = line.split('mediafire.com/" +') #Get the number of the server serv = line.split("http://download")[1].split(".")[0] #error toma otra cosa #Get the name of the file name = var[1].split('+')[1].split("/")[2].split('"')[0].strip("\\") #Find the real link among the fake ones it = iter(var) for tmp in it: #Real link if div_id in tmp: tmp = it.next() tmp = tmp.split('+')[0] #Get the final dl var in the table dl = table.split(tmp+"=")[1].split(";")[0].strip("'") raise FileLinkFoundException() #Result is encrypted else: tmp = line.split("=''")[-1] tmp = tmp.split("eval(") #Decrypt until the real link is found for t in tmp: if "unescape" in t: t = t.replace("\\","") t = t.split("=''")[-1] res = self.decrypt(t, div_id) if len(res) == 3: serv = res[0] var = res[1] name = res[2] raise FileLinkFoundException() #if we get here, the link was not found. raise LinkNotFoundException("Link not found") except FileLinkFoundException as err: pass dl = table.split(var+"=")[1].split(";")[0].strip("'") link_file = "http://%s/%sg/%s/%s" % (serv, dl, qk, name) with URLClose(URLOpen(cookie).open(link_file, range=(self.content_range, None)), always_close=False) as s: source = s print link_file except (urllib2.URLError, httplib.HTTPException, socket.error) as err: print err err_msg = err except (CaptchaException, LinkNotFoundException) as err: print err err_msg = err logging.exception(err) except Exception as err: err_msg = err print err logging.exception(err) return self.link, source, err_msg #puede ser el objeto archivo o None.
def add(self): #wait_func: wait method from thread_managed """ TODO: Refactory. """ link_file = None err_msg = None source = None wait = WAITING ajax_id = None recaptcha_key = None try: file_id = self.link.split("/files/")[1].split("/")[0] cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib ajax_id_url = BASE_URL + "/files-ajax/" + file_id + "/request.html" with URLClose(opener.open(self.link)) as s1: for line in s1: if "var ajaxdl" in line: ajax_id = line.split('"')[1] elif "challenge?k=" in line: recaptcha_key = line.split("challenge?k=")[-1].split( '"')[0].strip() if not ajax_id: #not recaptcha_key or not ajax_id: raise LinkErrorException("Link not found.") if self.wait_func(): return self.link, None, err_msg #wait time. #note: bitshare does not care for this. It can be skipped. #headers = {"Accept:": "application/json", } form = urllib.urlencode([("request", "generateID"), ("ajaxid", ajax_id)]) with URLClose(opener.open(ajax_id_url, form)) as s2: response = s2.read() #may return ERROR: explanation wait = int(response.split(":")[1]) #file:60:1 if wait > 120: raise LimitExceededException("Limit Exceeded") if self.wait_func(wait): return self.link, None, err_msg #recaptcha. #note: bitshare does not care for this. It can be skipped. if recaptcha_key: recaptcha_link = "http://www.google.com/recaptcha/api/challenge?k=%s" % recaptcha_key for retry in range(3): c = Recaptcha(BASE_URL, recaptcha_link, self.wait_func) challenge, response = c.solve_captcha() if response is not None: form = urllib.urlencode([ ("request", "validateCaptcha"), ("ajaxid", ajax_id), ("recaptcha_challenge_field", challenge), ("recaptcha_response_field", response) ]) response_ = opener.open(ajax_id_url, form).read( ) #may return ERROR: explanation or SUCCESS if not "ERROR" in response_: break else: raise CaptchaException("No response from the user") if "ERROR" in response_: raise CaptchaException("Wrong captcha") if self.wait_func(): return self.link, None, err_msg #get download link form = urllib.urlencode([("request", "getDownloadURL"), ("ajaxid", ajax_id)]) with URLClose(opener.open(ajax_id_url, form)) as s3: response = s3.read() link_file = response.split("http")[-1] link_file = "http" + link_file with URLClose(URLOpen(cookie).open(link_file, range=(self.content_range, None)), always_close=False) as sc: source = sc except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (LimitExceededException, CaptchaException, LinkErrorException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) err_msg = err logger.info(err) except Exception as err: err_msg logger.exception(err) return link_file, source, err_msg