This repository has been archived by the owner on Feb 13, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
scraper.py
78 lines (62 loc) · 2.33 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Download all hdris from HDRIHaven
# Importing Modules
import requests
import os
from sys import argv
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from urllib.request import urlretrieve
from urllib.request import URLopener
from fake_useragent import UserAgent
# Arguments
name, resolution, category, tonemappedjpg = argv
#File Count
filesnum = 0
print(f"Resolution {resolution}")
print(f"Category: {category}")
print(f"Tonemapped JPG: {tonemappedjpg}")
ua = UserAgent()
opener = URLopener()
opener.addheader('User-Agent', ua.chrome)
url = 'https://hdrihaven.com/hdris/'
url_category = url + '?c=' + category
r = requests.get(url_category, allow_redirects=True, headers={'User-Agent': ua.chrome})
soup = BeautifulSoup(r.text, 'html.parser')
save_to = category+' HDRI'
try:
os.mkdir(save_to)
except Exception as e:
pass
os.chdir(save_to)
hdris = soup.select('#item-grid a')
for hdri in hdris:
thumbnail = hdri.select('.thumbnail')[0]['data-src']
href = urlparse(hdri['href'])
filename = href.query[2:] + '_' + resolution
new_filename = filename.replace(category+'&h=','')
tonemapped = thumbnail.replace('/files/hdri_images/thumbnails/','')
dl_url = 'https://hdrihaven.com/files/hdris/' + new_filename
thumbnail_url = 'https://hdrihaven.com/' + thumbnail
tonemapped_url = 'https://hdrihaven.com/files/hdri_images/tonemapped/8192/' + tonemapped
print(f"\n{new_filename} - {dl_url}")
try:
print(f"{new_filename}.hdr downloading...")
ext = '.hdr'
opener.retrieve(dl_url + ext, new_filename + ext)
filesnum+=1
except Exception as e:
print(f"{new_filename}.hdr download failed, trying .exr...")
try:
ext = '.exr'
opener.retrieve(dl_url + ext, new_filename + ext)
filesnum+=1
except Exception as e:
print(f"{new_filename} download failed. Continuing...\n")
continue
if (tonemappedjpg=='Y' or tonemappedjpg=='y' or tonemappedjpg=='Yes' or tonemappedjpg=='yes'):
print(f"8K Tonemapped {tonemapped} downloading...")
opener.retrieve(tonemapped_url, os.path.basename(tonemapped_url))
else:
print(f"Thumbnail downloading...")
opener.retrieve(thumbnail_url, os.path.basename(thumbnail_url))
print(f"\nDownload completed. {filesnum} files downloaded.")