def parse(self): if "file_id" in self.link: #most likely not. file_id = self.link.split("file_id=")[-1].split("&")[0] else: file_id = self.link.split("netload.in/datei")[-1].split("/")[0].split(".")[0] link = self.link page = self.get_page(link) # m_pattern = 'Free_dl"><a href="(?P<link>[^"]+)' m = self.get_match(m_pattern, page, "Download not found") link = BASE_URL + "/" + m.group('link').replace("&", "&") page = self.get_page(link) cn_pattern = '>countdown\((?P<count>[^,]+)' self.countdown(cn_pattern, page, 600, 30) #this pattern may not work # m_pattern = 'src="(?P<link>[^"]+)"[^"]+"Sicherheitsbild' #captcha m = self.get_match(m_pattern, page, "Captcha not found") link = BASE_URL + "/" + m.group('link') captcha_result = tesseract.get_solved_captcha(link, self.cookie, self.filter) #file_id = self.get_match() form = [("file_id", file_id), ("captcha_check", captcha_result), ("start", "")] captcha_form_url = BASE_URL + "/" + "index.php?id=10" page = self.get_page(captcha_form_url, form=form) self.countdown(cn_pattern, page, 600, 30) s_pattern = 'class="Orange_Link" href="(?P<link>[^"]+)' self.source = self.click(s_pattern, page, False)
def parse(self): if "file_id" in self.link: #most likely not. file_id = self.link.split("file_id=")[-1].split("&")[0] else: file_id = self.link.split("netload.in/datei")[-1].split( "/")[0].split(".")[0] link = self.link page = self.get_page(link) m_pattern = 'Free_dl"><a href="(?P<link>[^"]+)' m = self.get_match(m_pattern, page) if m is not None: link = BASE_URL + "/" + m.group('link').replace("&", "&") page = self.get_page(link) cn_pattern = '>countdown\((?P<count>[^,]+)' self.countdown(cn_pattern, page, 600, 30) #this pattern may not work m_pattern = 'src="(?P<link>[^"]+)"[^"]+"Sicherheitsbild' #captcha m = self.get_match(m_pattern, page) if m is not None: link = BASE_URL + "/" + m.group('link') captcha_result = tesseract.get_solved_captcha( link, self.cookie, self.filter) #file_id = self.get_match() form = [("file_id", file_id), ("captcha_check", captcha_result), ("start", "")] captcha_form_url = BASE_URL + "/" + "index.php?id=10" page = self.get_page(captcha_form_url, form=form) self.countdown(cn_pattern, page, 600, 30) s_pattern = 'class="Orange_Link" href="(?P<link>[^"]+)' self.source = self.click(s_pattern, page, False) else: #captcha not found pass else: #dl not found pass
def add(self): """ TODO: Refactory. """ link_file = None err_msg = None source = None wait = WAITING try: cookie = cookielib.CookieJar() opener = URLOpen(cookie) #cookielib #url parse if "file_id" in self.link: #most likely not. file_id = self.link.split("file_id=")[-1].split("&")[0] else: file_id = self.link.split("netload.in/datei")[-1].split( "/")[0].split(".")[0] self.link = BASE_URL + "/" + "index.php?id=10&file_id=" + file_id with URLClose(opener.open(self.link)) as s1: if self.wait_func(): return self.link, None, err_msg for line in s1: if 'class="Free_dl' in line: id = line.split("?id=")[-1].split("&")[0] url = BASE_URL + "/" + line.split('href="')[-1].split( '"')[0].replace("&", "&") break with URLClose(opener.open(url)) as s2: for line in s2: if "captcha.php" in line: captcha_url = BASE_URL + "/" + line.split( 'src="')[-1].split('"')[0] elif ">countdown(" in line: try: wait = int( line.split(">countdown(")[-1].split(",") [0]) / 100 #ms except Exception as err: logger.exception(err) wait = WAITING if self.wait_func(wait + 1): return self.link, None, err_msg captcha_result = tesseract.get_solved_captcha( captcha_url, cookie, self.filter) form = urllib.urlencode([("file_id", file_id), ("captcha_check", captcha_result), ("start", "")]) captcha_form_url = BASE_URL + "/" + "index.php?id=" + id with URLClose(opener.open(captcha_form_url, form)) as s3: for line in s3: if ">countdown(" in line: try: wait = int( line.split(">countdown(")[-1].split( ",")[0]) / 100 #ms except Exception as err: logger.exception(err) wait = WAITING elif 'class="Orange_Link' in line: link_file = line.split('href="')[-1].split( '"')[0] if wait > 600: # 10 minutes raise LimitExceededException("Limit exceeded") if self.wait_func(wait + 1): return self.link, None, err_msg with URLClose(opener.open(link_file, range=(self.content_range, None)), always_close=False) as s4: source = s4 raise FileLinkFoundException() except (urllib2.URLError, httplib.HTTPException, socket.error) as err: err_msg = err except (FileLinkFoundException, LimitExceededException, LinkErrorException, CaptchaException) as err: if isinstance(err, LimitExceededException): self.set_limit_exceeded(True) err_msg = err logger.info(err) except Exception as err: logger.exception(err) err_msg = err return link_file, source, err_msg