Пример #1
0
def scrape2():
    # OPEN FILE WITH AS BINARY 'rb' AND PASS IT TO BS4 to deal with the encoding correctly i.e UTF8

    f = open(file_1, 'rb')
    soup = BeautifulSoup(f, "html.parser")
    html = soup.pretify()
    print(html)
Пример #2
0
def scrape():
    # OPEN FILE WITH AS BINARY 'rb' AND PASS IT TO BS4 to deal with the encoding correctly i.e UTF8
    try:
        f = open(file_1, 'rb')
        soup = BeautifulSoup(f, "html.parser")
        html = soup.pretify()

    except IOError as e:
        print("Error %s" % e)
        return

    except:
        print("Unexpected error ")
        return

    finally:
        f.close()

    print(html)
Пример #3
0
from bs4 import BeautifulSoup
import requests
import csv
source = requests.get(
    'https://www.ncr.com/company/executive-leadership-team').text

page = urllib2.urlopen(quote_page)

soup = BeautifulSoup(source, 'lxml')
print(soup.pretify())

browser = mechanicalsoup.StatefulBrowser
soup_config = {'features': 'lxml'},  # Use the lxml HTML parser
raise_on_404 = True,
user_agent = 'MyBot/0.1: mysite.example.com/bot_info',

browser.open(url)
# ...
browser.close()
with open('simple.HTML') as HTML_file:
    soup = BeautifulSoup(HTML_file, 'lxml')
vp_name = soup.find(
    'div',
    class_='leadership-team_wrapper_cntr_leaders-grid_leader_info_designation')
print(vp_name)

import csv
from datetime import datetime

with open('index.csv') as csv_file:
    writer = csv.writer(csv)