Пример #1
0
def dl_videos(link):
    content = climber.get_content(link)
    if content == None:
        return
    soup = BeautifulSoup(content)
    boxes = soup.findAll('div', {'class': 'boxim'})
    if boxes == None:
        return

    links = []
    product_ids = []
    for sec in boxes:
        a_link_sec = sec.find('a')
        if a_link_sec == None:
            continue
        a_link = a_link_sec['href']
        a_title = a_link_sec['title']
        product_id = a_title.split(' ')[3]
        print("[{}] {}".format(product_id, a_link))
        links.append(a_link)
        product_ids.append(product_id)
    for idx, each_link in enumerate(links):
        content1 = climber.get_content(each_link)
        if content1 == None:
            continue
        soup1 = BeautifulSoup(content1)
        ep_num = get_ep_num(soup1)

        vid_link = get_final_video_link(each_link)
        if vid_link == '' or vid_link == None:
            continue
        # download this vid_link to product_id_1.mp4
        first_file_path = "{}_1.mp4".format(product_ids[idx])
        print("Download {} ...".format(first_file_path))
        if os.path.exists(first_file_path):
            print("{} exists, skip".format(first_file_path))
        else:
            dl.download_url(vid_link, first_file_path)

        #print(vid_link) 
        for i in range(2, ep_num + 1):
            new_link = "{}?ep={}".format(each_link, i)
            vid_link1 = get_final_video_link(new_link)
            if vid_link1 == '' or vid_link1 == None:
                continue
            #print(vid_link1)
            to_path = "{}_{}.mp4".format(product_ids[idx], i)
            print("Download {} ...".format(to_path))
            if os.path.exists(to_path):
                print("{} exists, skip".format(to_path))
            else:
                dl.download_url(vid_link1, to_path)
Пример #2
0
def get_final_video_link(link):
    content = climber.get_content(link)
    if content == None:
        return
    soup = BeautifulSoup(content)
    if soup == None:
        return
    
    iframe_vlink = "http://www.porn609.com/{}".format(get_iframe_video_link(soup))
    video_page = climber.get_content(iframe_vlink)
    if video_page == None:
        return
    soup2 = BeautifulSoup(video_page)
    if soup2 == None:
        return
    final_video_link = get_video_page_source_link(soup2) 
    return final_video_link 
Пример #3
0
def scan(link):
    content = climber.get_content(link)
    if content == None:
        return
    soup = BeautifulSoup(content)
    if soup == None:
        return
    max_page_num = get_max_page_num(soup)

    for page_num in range(1, max_page_num + 1):
        page_link = "http://www.javlibrary.com/tw/userposts.php?mode=&u=javmember&page={}".format(page_num) 
        tmp_content = climber.get_content(page_link)
        if tmp_content == None:
            continue
        tmp_soup = BeautifulSoup(tmp_content)
        if tmp_soup == None:
            continue
        
        video_comments = tmp_soup.findAll('table', {'class': 'comment'})
        if video_comments == None:
            continue
        for comment in video_comments:
            first_img_link = comment.find('img', {'style': 'float:left'})
            if first_img_link == None:
                continue
            img_link = first_img_link['src']
        
            print('<img src="{}"><br>'.format(img_link))
            ''' find title '''
            strong_sec = comment.find('strong')
            if strong_sec == None:
                continue
            title = strong_sec.find('a').contents[0]
            #print("Title: {}".format(title))
            text_sec = comment.find('textarea', {'class': 'hidden'})
            if text_sec == None:
                continue
            rapidgator_link = text_sec.contents[0].split('][')[0].split('url=')[1]
            #print(rapidgator_link)  
            print('<a href="{}">{}/</a><br><br>'.format(rapidgator_link, title))