Пример #1
0
from bs4 import BeautifulSoup
import requests
import urllib.parse as parse
import os.path as path
import urls

url = "https://blog.naver.com/PostView.nhn?blogId=korea_diary&logNo=221433346994&redirect=Dlog&widgetTypeCall=true&topReferer=https%3A%2F%2Fwww.naver.com%2F&directAccess=false"

res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')

sel = "img.se-image-resource"

imgs = soup.select(sel)
# print(imgs, len(imgs))

if len(imgs) < 1:
    exit()

img = imgs[0]
src = img.get('src')
print('img>>>', src)

for img in imgs:
    src = img.get('src')
    print("img>>", src)
    with open("/Users/lhj/images/" + urls.getFilename(src), "wb") as file:
        file.write(requests.get(src).content)
Пример #2
0
orgUrl = urls.urljoin(urls.getHostname(bbsUrl, True), src)

orgHtml = requests.get(orgUrl).text
orgSoup = BeautifulSoup(orgHtml, 'html.parser')

titleSel = "div.se-title-text span"
titleEle = orgSoup.select_one(titleSel)
if not titleEle:
    titleEle = orgSoup.select_one('div.se_title')

if titleEle:
    title = titleEle.text.strip()
else:
    title = 'Title 못찾음!! ' + bbsUrl

# print("-->", title)
# exit()

sel = "img.se-image-resource"
imgs = orgSoup.select(sel)
# print(imgs, len(imgs))

if len(imgs) < 1:
    exit()

print("--------------------------------------", title)
for img in imgs:
    src = img.get('src')
    print("img>>", src)
    with open("./images/" + urls.getFilename(src), "wb") as file:
        file.write(requests.get(src).content)
Пример #3
0
from bs4 import BeautifulSoup
import requests
import urllib.parse as parse
import os.path as path
import urls

url = "https://blog.naver.com/PostView.nhn?blogId=korea_diary&logNo=221433346994&redirect=Dlog&widgetTypeCall=true&topReferer=https%3A%2F%2Fwww.naver.com%2F&directAccess=false"
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')

# sel = "#SE-9311ee77-8bde-4b1f-9e02-7ea73e016f1f > div > div > a > img"
sel = "img.se-image-resource"

imgs = soup.select(sel)
# print(imgs, len(imgs))

if len(imgs) < 1:
    exit()

print("--------------------------------------")
for img in imgs:
    src = img.get('src')
    print("img>>", src)
    with open("./crawl/" + urls.getFilename(src), "wb") as file:
        file.write(requests.get(src).content)