def run(self): from cufacesearch.imgio.imgio import get_buffer_from_URL, get_buffer_from_filepath, buffer_to_B64 while self.q_in.empty() == False: try: # The queue should already have items, no need to block (sha1, in_img, push_back) = self.q_in.get(False) except: continue try: if self.url_input: try: img_buffer = get_buffer_from_URL(in_img) except Exception as inst: if self.fallback_pattern: # Adding fallback to Tellfinder images here # TODO: should we and how could we also update URL in DB? img_buffer = get_buffer_from_URL(self.fallback_pattern.format(sha1)) else: raise inst else: img_buffer = get_buffer_from_filepath(in_img) if img_buffer: # Push self.q_out.put((sha1, buffer_to_B64(img_buffer), push_back, None)) except Exception as inst: self.q_out.put((sha1, None, push_back, inst)) # Mark as done self.q_in.task_done()
def run(self): # Cannot use local import with main in this file from cufacesearch.imgio.imgio import get_SHA1_img_info_from_buffer, get_buffer_from_URL while self.q_in.empty() == False: try: # The queue should already have items, no need to block url, obj_pos = self.q_in.get(False) except: continue img_buffer = None img_info = None inst = None start_process = time.time() try: img_buffer = get_buffer_from_URL(url) if img_buffer: sha1, img_type, width, height = get_SHA1_img_info_from_buffer( img_buffer) img_info = (sha1, img_type, width, height) end_process = time.time() else: end_process = time.time() except Exception as inst: end_process = time.time() # Push self.q_out.put((url, obj_pos, img_buffer, img_info, start_process, end_process, inst)) # Mark as done self.q_in.task_done()
def process(self): from cufacesearch.imgio.imgio import get_SHA1_img_info_from_buffer, get_buffer_from_URL # Get images data and infos for sha1, url in self.get_next_img(): if (self.process_count + self.process_failed) % self.display_count == 0: avg_process_time = self.process_time / max( 1, self.process_count + self.process_failed) print_msg = "[%s] dl count: %d, failed: %d, time: %f" print print_msg % (self.pp, self.process_count, self.process_failed, avg_process_time) dict_imgs = dict() # Could we multi-thread that? start_process = time.time() if self.verbose > 2: print_msg = "[{}.process_one: info] Downloading image from: {}" print print_msg.format(self.pp, url) try: img_buffer = get_buffer_from_URL(url) if img_buffer: sha1, img_type, width, height = get_SHA1_img_info_from_buffer( img_buffer) dict_imgs[url] = { 'img_buffer': img_buffer, 'sha1': sha1, 'img_info': { 'format': img_type, 'width': width, 'height': height } } self.toc_process_ok(start_process) else: self.toc_process_failed(start_process) if self.verbose > 1: print_msg = "[{}.process_one: info] Could not download image from: {}" print print_msg.format(self.pp, url) except Exception as inst: self.toc_process_failed(start_process) if self.verbose > 0: print_msg = "[{}.process_one: error] Could not download image from: {} ({})" print print_msg.format(self.pp, url, inst) # Push to images_out_topic # Beware, this pushes a LOT of data to the Kafka topic self.images_out_topic... for img_out_msg in self.build_image_msg(dict_imgs): self.producer.send(self.images_out_topic, img_out_msg)
diffs = [] rows = [] if list_sha1s[0]: hbi = HBaseIndexerMinimal(conf, prefix="HBI_") rows = hbi.get_columns_from_sha1_rows( list_sha1s, columns=["info:featnorm_cu", "info:s3_url"]) sbclif = SentiBankCmdLineImgFeaturizer(conf) sbpcif = SentiBankPyCaffeImgFeaturizer(pyconf) for row in rows: feat_hbase_b64 = featB64decode(row[1]["info:featnorm_cu"]) #print feat_hbase_b64.shape img_url = row[1]["info:s3_url"] start_extr = time.time() img_buffer = get_buffer_from_URL(img_url) feat, data = sbclif.featurize(img_buffer, sha1=row[0]) img_buffer.seek(0) pydata = sbpcif.preprocess_img(img_buffer) fpydata = pydata.flatten() idata = data.reshape((3, 227, 227)) print img_url print idata.shape print pydata.shape misc.imsave(row[0] + "_cmd.jpg", np.swapaxes(idata, 0, 2)) misc.imsave(row[0] + "_py.jpg", np.swapaxes(pydata, 0, 2)) sqdiff = [ np.sqrt((idata[c, :, :] - pydata[c, :, :])**2) for c in range(3) ] print[(np.sum(sqdiff[0]), np.mean(sqdiff[c]), np.max(sqdiff[c]), np.min(sqdiff[c])) for c in range(3)]
def process_one(self, msg): # Cannot use local import with main in this file from cufacesearch.imgio.imgio import get_SHA1_img_info_from_buffer, get_buffer_from_URL self.print_stats(msg) msg_value = json.loads(msg.value) # From msg value get list_urls for image objects only list_urls = self.get_images_urls(msg_value) if self.verbose > 3: print_msg = "[{}.process_one: info] Got {} image urls from ad id {}" print print_msg.format(self.pp, len(list_urls), msg_value['_id']) # Get images data and infos dict_imgs = dict() # Could we multi-thread that? for url, obj_pos in list_urls: # process time is by image and not by msg... start_process = time.time() if self.verbose > 2: print_msg = "[{}.process_one: info] Downloading image from: {}" print print_msg.format(self.pp, url) try: img_buffer = get_buffer_from_URL(url) if img_buffer: sha1, img_type, width, height = get_SHA1_img_info_from_buffer( img_buffer) dict_imgs[url] = { 'obj_pos': obj_pos, 'img_buffer': img_buffer, 'sha1': sha1, 'img_info': { 'format': img_type, 'width': width, 'height': height } } self.toc_process_ok(start_process) else: self.toc_process_failed(start_process) if self.verbose > 1: print_msg = "[{}.process_one: info] Could not download image from: {}" print print_msg.format(self.pp, url) except Exception as inst: self.toc_process_failed(start_process) if self.verbose > 0: print_msg = "[{}.process_one: error] Could not download image from: {} ({})" print print_msg.format(self.pp, url, inst) sys.stdout.flush() # Push to cdr_out_topic # check if self.cdr_out_topic is empty? as this is only for DIG ingestion... if self.cdr_out_topic: self.producer.send(self.cdr_out_topic, self.build_cdr_msg(msg_value, dict_imgs)) else: print_msg = "[{}.process_one: warning] cdr_out_topic is not defined" print print_msg.format(self.pp) # NB: we could have all extraction registered here, # and not pushing an image if it has been processed by all extractions. # But that violates the consumer design of Kafka... # Push to images_out_topic for img_out_msg in self.build_image_msg(dict_imgs): self.producer.send(self.images_out_topic, img_out_msg)
def show_face_from_URL(img_url, bbox, close_after=None): from cufacesearch.imgio.imgio import get_buffer_from_URL from PIL import Image img_buffer = get_buffer_from_URL(img_url) img = Image.open(img_buffer) show_face(img, bbox, close_after)