Exemplo n.º 1
0
   def fetch_image(self, url, referer_url, terms):
      """ If it meets the size and metadata requirements, download the image referenced by the target URL.

         Args:
            url: A string with the uniform resource locator of the image
      """
      logging.info('Started - fetch_image')
      
      if re.search(GeoSpider._RE_FILTER, url):
         raise gserror.FetchError('regular expression filter error')         
      
      try:
         response = urllib2.urlopen(url)
         img_data = response.read()
         img_name = hashlib.sha224(img_data).hexdigest()
      except urllib2.URLError as e:
         if hasattr(e,'code'):
            raise gserror.FetchError('{0} {1}'.format(e.code, GeoSpider.http_status_message(e.code)))
         raise gserror.FetchError('url error')
      except socket.timeout:
         raise gserror.FetchError('socket timeout')
      except socket.error:
         raise gserror.FetchError('socket error')

      if img_name in self.seen_images:
         raise gserror.FetchError('previously crawled image error')
      
      self.seen_images.add(img_name)
      
      try:
         img = Image.open(StringIO.StringIO(img_data))
      except IOError:
         raise gserror.FetchError('could not open the image error')       
      
      if img.format != "JPEG":
         raise gserror.FetchError('incorrect image format %s' % img.format)
      
      if img.size[0] < GeoSpider._MIN_IMAGE_WIDTH or img.size[1] < GeoSpider._MIN_IMAGE_HEIGHT:
         raise gserror.FetchError('the image size (%d,%d) does not reach the minimum threshold' % img.size)
      
      try:
         exif_header = gsutils.get_exif_header(img)
      except gserror.EmptyExifHeaderError as e:
         raise gserror.FetchError('exif header not found')

      logging.info(termcolor.colored('exif-header: %s' % (exif_header), 'green'))
         
      try:
         latlng = gsutils.get_latlng(exif_header)
      except gserror.EmptyLatLngError as e:
         raise gserror.FetchError('coordinates not found')
      
      logging.info(termcolor.colored('image coordinates: (%.6f, %.6f)' % latlng, 'magenta'))
      logging.info(termcolor.colored('image size: (%d,%d)' % img.size, 'magenta'))
      logging.info(termcolor.colored('image name: {0}.jpg'.format(img_name), 'magenta'))
      
      img_path = os.path.join(self.path, '{0}.jpg'.format(img_name))
      if os.path.isfile(img_path):
         raise gserror.FetchError('previously storaged image error')
      
      gsdoc = GeoSpiderDocument(img_name, latlng, url, referer_url, 
                  terms, img, img_data, exif_header)
      
      try:
         self.save(gsdoc)
      except gserror.GeocodeResponseError as e:
         raise gserror.FetchError('google api error {0}'.format(e))
      except urllib2.URLError:
         raise gserror.FetchError('could not connect to google map api')
      except ValueError:
         raise gserror.FetchError('value error could not save the xml document')
      except IOError:
         raise gserror.FetchError('input/output error')
      except socket.timeout:
         raise gserror.FetchError('socket timeout')
      except socket.error:
         raise gserror.FetchError('socket error')
      logging.info('Finished - fetch_image')
Exemplo n.º 2
0
      img = None
      exif_header = None
      try:
         img = Image.open(filename)
      except:
         img = None
      if img:
         img_cnt = img_cnt + 1
         try:
            exif_header = gsutils.get_exif_header(img)
         except:
            exif_header = None
            
         if exif_header:
            try:
               latlng = gsutils.get_latlng(exif_header)
            except:
               latlng = [None,None]
            if latlng[0] and latlng[1]:
               google_map_params = google_map_params + "&markers=color:red%7C" + "%.6f,%.6f" % (latlng[0],latlng[1]) 
               img_gps_cnt = img_gps_cnt + 1
               exif_model = exif_header.get("Model", '')
               if len(exif_model) == 0:
                  exif_model = 'None'
               exif_model = exif_model.translate(__CONTROL_CH, __CONTROL_CH[:32])
               m_dev[exif_model] = m_dev.get(exif_model,0) + 1
               print "%s %s (%.6f,%.6f)" % (exif_model.ljust(45), filename.ljust(80), latlng[0], latlng[1])

if img_cnt > 0:
   print '\n%s%d'   % ('total images'.ljust(25),img_cnt)
   print '%s%d'     % ('images with gps'.ljust(25),img_gps_cnt)