示例#1
0
def get_link_to_pic_slideshow():
    raw_html_pics = simple_get(get_url_to_pics())
    html_pics = BeautifulSoup(raw_html_pics, 'html.parser')
    first_pic_link = html_pics.find("div", class_="photo-box--interactive").a["href"]
    first_pic_link = 'https://www.yelp.com' + first_pic_link

    return(first_pic_link)
示例#2
0
def get_total_pic_pages():

    # kinda hacky rn, maybe check how the javascript or something else
    # generates the page nums 
    raw_html_pics = simple_get(get_url_to_pics())
    html_pics = BeautifulSoup(raw_html_pics, 'html.parser')

    total_num_html = html_pics.find("div", class_="page-of-pages").text.strip()
    total_num_html = total_num_html.split()[3]
    total_num_html = int(total_num_html)
    return(total_num_html)
示例#3
0
def get_pictures():
    raw_html_pics = simple_get(get_url_to_pics())
    html_pics = BeautifulSoup(raw_html_pics, 'html.parser')

    list_of_pic_urls = []

    # picture_links = html_pics.find_all("img", class_="photo-box-img")
    # for link in range(len(picture_links)):
    #     picture_links[link] = picture_links[link]["src"]

    # return(picture_links)

    # could also probs use li with class 'data-photo-id' 

    for tag in html_pics.find_all("div", class_="photo-box--interactive"):
        list_of_pic_urls.append(tag.img["src"])

    return(list_of_pic_urls)
示例#4
0
from bs4 import BeautifulSoup
from parser import simple_get

# raw_html = open('test.html').read()

raw_html = simple_get('https://www.yelp.com/biz/jacks-prime-san-mateo-4?osq=burger')
print("done")
html = BeautifulSoup(raw_html, 'html.parser')

class Business():
    pass 



    















示例#5
0
def get_total_pics():
    raw_html_pics = simple_get(get_url_to_pics())
    html_pics = BeautifulSoup(raw_html_pics, 'html.parser')

    num_of_pics = html_pics.find("span", class_="tab-link_count").text.strip('()')
    return(int(num_of_pics))