Пример #1
0
        print("Search term %s failed, moving on to next search term\n***" %
              str(idx + 1))
        continue

    # Check to see if any results were returned from the search.
    # If there were none, move onto the next search.
    if zl.test_for_no_results(driver):
        print(
            "Search %s returned zero results. Moving on to next search\n***" %
            str(term))
        continue

    # Pull the html for each page of search results. Zillow caps results at
    # 20 pages, each page can contain 26 home listings, thus the cap on home
    # listings per search is 520.
    raw_data = zl.get_html(driver)
    print("%s pages of listings found" % str(len(raw_data)))

    # Take the extracted HTML and split it up by individual home listings.
    listings = zl.get_listings(raw_data)
    print("%s home listings scraped\n***" % str(len(listings)))

    # For each home listing, extract the 11 variables that will populate that
    # specific observation within the output dataframe.
    for home in listings:
        soup = BeautifulSoup(home, "lxml")
        new_obs = []

        # List that contains number of beds, baths, and total sqft (and
        # sometimes price as well).
        card_info = zl.get_card_info(soup)
Пример #2
0
    else:
        print("Search term %s failed, moving on to next search term\n***" % 
              str(idx + 1))
        continue

    # Check to see if any results were returned from the search.
    # If there were none, move onto the next search.
    if zl.test_for_no_results(driver):
        print("Search %s returned zero results. Moving on to next search\n***" %
              str(term))
        continue

    # Pull the html for each page of search results. Zillow caps results at 
    # 20 pages, each page can contain 26 home listings, thus the cap on home 
    # listings per search is 520.
    raw_data = zl.get_html(driver)
    print("%s pages of listings found" % str(len(raw_data)))

    # Take the extracted HTML and split it up by individual home listings.
    listings = zl.get_listings(raw_data)
    print("%s home listings scraped\n***" % str(len(listings)))

    # For each home listing, extract the 11 variables that will populate that 
    # specific observation within the output dataframe.
    for home in listings:
        new_obs = []
        parser = zl.html_parser(home)

        # Street Address
        new_obs.append(parser.get_street_address())