/
downloadImageORIGINALE.py
100 lines (76 loc) · 2.45 KB
/
downloadImageORIGINALE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- coding: cp1252 -*-
import json
import os
import time
import requests
from PIL import Image
from StringIO import StringIO
from requests.exceptions import ConnectionError
from accessoCartella import accessoCartella
class Download:
def go(self,query, path, numImm, googleIndex):
"""Download full size images from Google image search.
Don't print or republish images without permission.
I used this to train a learning algorithm.
"""
BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\
'v=1.0&q=' + query + '&start=%d'
if not os.path.exists(path):
os.makedirs(path)
imageFolder=accessoCartella()
# Google's start query string parameter for pagination.
urlVec=[]
k=len(imageFolder.leggi(path))
baseWidth=500
ciclo=0
start=googleIndex
print "start Iniziale: " + str(start)
j=k
while k<numImm:
vector=[]
#while start < numImm: # Google will only return a max of 56 results.
r = requests.get(BASE_URL % start)
for image_info in json.loads(r.text)['responseData']['results']:
print "start ciclo: " + str(start)
url = image_info['unescapedUrl']
if(url in urlVec):
start+=1
else:
urlVec.append(url)
try:
image_r = requests.get(url)
except ConnectionError, e:
print 'could not download %s' % url
j-=1
continue
if(j<10):
title=query+"00"+str(j)
elif(j<100):
title=query+"0"+str(j)
else:
title=query+str(j)
j+=1
FILE_PATH=os.path.join(path, '%s.jpg') % title
file = open(FILE_PATH, 'wb')
try:
image=Image.open(StringIO(image_r.content))
wpercent=(baseWidth/float(image.size[0]))
hsize = int(float(image.size[1])*float(wpercent))
image=image.resize((baseWidth, hsize), Image.ANTIALIAS)
image.save(file, 'JPEG')
except IOError, e:
# Throw away some gifs...blegh.
print 'could not save %s' % url
start+=1
j-=1
continue
finally:
file.close()
start+=1
# Una volta raggiunto il numero di immagini interrompe il ciclo!
print "j: " +str(j)
if(j>=numImm):
return start
time.sleep(1.5)
k=len(imageFolder.leggi(path))
# Example use