def dl_videos(link): content = climber.get_content(link) if content == None: return soup = BeautifulSoup(content) boxes = soup.findAll('div', {'class': 'boxim'}) if boxes == None: return links = [] product_ids = [] for sec in boxes: a_link_sec = sec.find('a') if a_link_sec == None: continue a_link = a_link_sec['href'] a_title = a_link_sec['title'] product_id = a_title.split(' ')[3] print("[{}] {}".format(product_id, a_link)) links.append(a_link) product_ids.append(product_id) for idx, each_link in enumerate(links): content1 = climber.get_content(each_link) if content1 == None: continue soup1 = BeautifulSoup(content1) ep_num = get_ep_num(soup1) vid_link = get_final_video_link(each_link) if vid_link == '' or vid_link == None: continue # download this vid_link to product_id_1.mp4 first_file_path = "{}_1.mp4".format(product_ids[idx]) print("Download {} ...".format(first_file_path)) if os.path.exists(first_file_path): print("{} exists, skip".format(first_file_path)) else: dl.download_url(vid_link, first_file_path) #print(vid_link) for i in range(2, ep_num + 1): new_link = "{}?ep={}".format(each_link, i) vid_link1 = get_final_video_link(new_link) if vid_link1 == '' or vid_link1 == None: continue #print(vid_link1) to_path = "{}_{}.mp4".format(product_ids[idx], i) print("Download {} ...".format(to_path)) if os.path.exists(to_path): print("{} exists, skip".format(to_path)) else: dl.download_url(vid_link1, to_path)
def get_final_video_link(link): content = climber.get_content(link) if content == None: return soup = BeautifulSoup(content) if soup == None: return iframe_vlink = "http://www.porn609.com/{}".format(get_iframe_video_link(soup)) video_page = climber.get_content(iframe_vlink) if video_page == None: return soup2 = BeautifulSoup(video_page) if soup2 == None: return final_video_link = get_video_page_source_link(soup2) return final_video_link
def scan(link): content = climber.get_content(link) if content == None: return soup = BeautifulSoup(content) if soup == None: return max_page_num = get_max_page_num(soup) for page_num in range(1, max_page_num + 1): page_link = "http://www.javlibrary.com/tw/userposts.php?mode=&u=javmember&page={}".format(page_num) tmp_content = climber.get_content(page_link) if tmp_content == None: continue tmp_soup = BeautifulSoup(tmp_content) if tmp_soup == None: continue video_comments = tmp_soup.findAll('table', {'class': 'comment'}) if video_comments == None: continue for comment in video_comments: first_img_link = comment.find('img', {'style': 'float:left'}) if first_img_link == None: continue img_link = first_img_link['src'] print('<img src="{}"><br>'.format(img_link)) ''' find title ''' strong_sec = comment.find('strong') if strong_sec == None: continue title = strong_sec.find('a').contents[0] #print("Title: {}".format(title)) text_sec = comment.find('textarea', {'class': 'hidden'}) if text_sec == None: continue rapidgator_link = text_sec.contents[0].split('][')[0].split('url=')[1] #print(rapidgator_link) print('<a href="{}">{}/</a><br><br>'.format(rapidgator_link, title))