def get_single_item_data(item_url):
    source_code = requests.get(item_url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text)
    for item_name in soup.findAll('div', {'class': 'i-name'}):
        print(item_name.string)
    for link in soup.findAll('a'):
        href = "https://buckysroom.org" + link.get('href')
        print(href)
示例#2
0
def get_single_item_data(item_url):
    source_code = requests.get(item_url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text)
    for item_name in soup.findAll('div', {'class': 'i-name'}):
        print(item_name.string)
    for link in soup.findAll('a'):
        href = "https://buckysroom.org" + link.get('href')
        print(href)
def trade_spider(max_pages):
  page = 1
  while page <= max_pages:
    url = 'https://buckysroom.org/trade/search.php?page=' + str(page)
    source_code = requests.get(url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text)
    for link in soup.findAll('a', {'class': 'item-name'}):
        href = "https://buckysroom.org" + link.get('href')
        title = link.string
        print(href)
        print(title)
    page += 1
示例#4
0
def trade_spider(max_pages):
    page = 1
    while page <= max_pages:
        url = 'DESIRED WEBSITE' + str(page)
        source_code = requests.get(url)
        plain_text = source_code.text
        soup = BeautifulSoup(plain_text)
        for link in soup.findAll('a', {'class': 'item-name'}):
            href = "DESIRED WEBSITE" + link.get('href')
            title = link.string

            get_single_item_data(href)
        page += 1
示例#5
0
def trade_spider(max_pages):
    page = 1
    while page <= max_page:
        url = 'https://buckysroom.org/trade/search.php?page=' + str(page)
        source_code = requests.get(url)
        plain_text = source_code.text
        soup = BeautifulSoup(plain_text)
        for link in soup.findAll('a', {'class': 'item-name'}):
            href = "https://buckysroom.org" + link.get('href')
            title = link.string
            print(href)
            print(title)
        page += 1
def get_composer(number):
    wiki_composer_page = requests.get(
        "https://en.wikipedia.org/wiki/List_of_classical_music_composers_by_era"
    )
    if not wiki_composer_page.ok:
        print("Failed to download list of composers")
        raise SystemExit(1)
    composer_soup = BeautifulSoup(wiki_composer_page.text, "html.parser")
    timelines = composer_soup.find_all("div", class_="timeline-wrapper")
    composer_elements = []
    for item in timelines:
        composer_elements.extend(item.find_all("area"))
    composers = [comp['title'] for comp in composer_elements]

    choices = random.choices(composers, k=number)
    for item in choices:
        print(item)
示例#7
0
# -*- coding: utf-8 -*-
from b4 import BeautifulSoup
#from b4 import BeautifulSoup
import urllib2
#import os, re, urlparse

#Site = 'https://www.google.co.jp'
Site = 'http://www.yahoo.co.jp/'

soup = BeautifulSoup(urllib2.urlopen(Site), "lxml")
#res = soup.find_all("a")
#res = soup.a.get("href")
res = soup.select('a[href^="http://"]')

for one in res:
    print one

#from pprint import pprint
#pprint(txt)
print 'Finish'
示例#8
0
import requests
from b4 import BeautifulSoup

url = "http://kurstenge.kz"
r = requests.get(url)
html = r.text
soup = BeautifulSoup(html, "html.parser")
rows = soup.select("#content table table tr")
for tr in rows:
    print(tr.text)