def pull_avitar(self): print self.emit() t = time() avi = self.addAsset(None, "%s_%d.png" % (generateMD5Hash(content=self.profile_image_url), t), description="user's avitar at %d" % t, tags=[ASSET_TAGS['FD_AVI']]) if avi is None: return False with settings(warn_only=True): local("wget -O %s %s" % (os.path.join(ANNEX_DIR, avi), self.profile_image_url)) import pypuzzle puzz = pypuzzle.Puzzle() try: cvec = puzz.get_cvec_from_file(os.path.join(ANNEX_DIR, avi)) self.addAsset(cvec, "avitar_image_cvec_%d.json" % t, as_literal=False, tags=[ASSET_TAGS['IMAGE_CVEC']]) return True except Exception as e: if DEBUG: print "Could not get image vector because %s" % e return False
def visually_dedupe_emotes(self): logger.info('Beginning visually_dedupe_emotes()') processed_emotes = [] duplicates = [] puzzle = pypuzzle.Puzzle() # Some images like 'minigunkill' got a generic vector (a vector consisting of only zero's) # These images where merged with other images who also got a generic vector. # Setting noise cutoff fixed this. puzzle.set_noise_cutoff(0) for subreddit in self.subreddits: subreddit_emotes = [x for x in self.emotes if x['sr'] == subreddit] logger.info('Visually dedupeing emotes in subreddit ' + subreddit) for emote in subreddit_emotes: if emote in duplicates: continue # Ignore animations as they sometime start with a blank (transparant) frame. # We only check the first frame and thus they are visually the same as any other blank picture. if emote['base_img_animation'] or ( has_hover(emote) and emote['hover_img_animation']): continue image_path = get_single_image_path(self.output_dir, emote) logger.debug('puzzle.get_cvec_from_file(' + image_path + ')') vector = puzzle.get_cvec_from_file(image_path) for other_emote, other_compressed_vector in processed_emotes: other_vector = puzzle.uncompress_cvec( other_compressed_vector) if other_emote in duplicates: continue distance = puzzle.get_distance_from_cvec( vector, other_vector) if (distance > 0): pass # Images are not equal. else: # Images are equal! Lets merge them. self._merge_emotes(other_emote, emote) duplicates.append(emote) processed_emotes.append((emote, puzzle.compress_cvec(vector))) self.emotes = [ emote for emote in self.emotes if emote not in duplicates ]
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--dataset_path", type=str, required=True) parser.add_argument("-t", "--threshold", type=float, required=False, default=0.2) args = parser.parse_args() features_path = args.dataset_path + "/images/" labels_path = args.dataset_path + "/labels/" duplicates_path = args.dataset_path + "/duplicate/" if not utils.valid_dataset(args.dataset_path): print "Invalid dataset" sys.exit(-1) utils.make_dirs([duplicates_path]) images = utils.collect_images(features_path) vectors = collect_vectors(images) duplicate_log = open(duplicates_path + "/duplicates.log", "w") puzzle = pypuzzle.Puzzle() for i in range(len(vectors) - 1): for j in range(i + 1, len(vectors)): if not utils.exists_paths([images[i], images[j]]): continue threshold = abs( puzzle.get_distance_from_cvec(vectors[i], vectors[j]) - args.threshold) if threshold <= 0.01: duplicate_img = duplicates_path + os.path.basename(images[j]) shutil.move(images[j], duplicate_img) shutil.move( labels_path + os.path.basename(images[j]), args.dataset_path + "/duplicate/label_" + os.path.basename(images[j])) duplicate_log.write("Duplicate " + str(images[i]) + " " + str(images[j]) + " threshold " + str(threshold) + '\n')
def get_image_vector(uv_task): task_tag = "AVI: GETTING IMAGE VECTOR" print "\n\n************** %s [START] ******************\n" % task_tag uv_task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import ANNEX_DIR import pypuzzle image = UnveillanceDocument(_id=uv_task.doc_id) puzz = pypuzzle.Puzzle() try: cvec = puzz.get_cvec_from_file(os.path.join(ANNEX_DIR, image.file_name)) except Exception as e: error_msg = "Could not get image vector because %s" % e print error_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=error_msg) return from vars import ASSET_TAGS if not image.addAsset(cvec, "image_cvec.json", as_literal=False, tags=[ASSET_TAGS['IMAGE_CVEC']]): error_msg = "could not save cvec asset!" print error_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=error_msg) return print "\n\n************** %s [END] ******************\n" % task_tag uv_task.finish()
def collect_vectors(images): puzzle = pypuzzle.Puzzle() return [puzzle.get_cvec_from_file(img) for img in images]
def setUp(self): self.puzzle = pypuzzle.Puzzle()
def compare_avis(uv_task): task_tag = "CLUSTER: COMPARING 2 AVIS" print "\n\n************** %s [START] ******************\n" % task_tag uv_task.setStatus(302) if not hasattr(uv_task, 'avis') or len(uv_task.avis != 2): error_msg = "Cannot compare anything." print error_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=error_msg, status=412) return from lib.Worker.Models.uv_document import UnveillanceDocument try: avis = map(lambda a: UnveillanceDocument(_id=a), uv_task.avis) except Exception as e: error_msg = "could not load up avis as UnveillanceDocuments: %s" % e print error_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=error_msg, status=412) return from conf import ANNEX_DIR from vars import ASSET_TAGS from json import loads import pypuzzle puzz = pypuzzle.Puzzle() try: compare_avi = puzz.get_distance_from_cvec( *(map(lambda a: loads(a.loadAsset("image_cvec.json")), avis))) except Exception as e: error_msg = "could not get one or more image vectors because %s" % e print error_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=error_msg, status=412) return if type(compare_avi) not in [int, float]: error_msg = "non-numerical result for comparaison." print error_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=error_msg, status=412) return c_map = { 'avis': map(lambda a: { 'file_name': a.file_name, '_id': a._id }, avis), 'compared': compare_avi } if not uv_task.addAsset(c_map, "compare_avi_output.json", as_literal=False, tags=[ASSET_TAGS['C_RES']]): error_msg = "could not save result asset to this task." print error_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=error_msg) return print "\n\n************** %s [END] ******************\n" % task_tag uv_task.finish()
def matchImg(eva, evb): puzzle = pypuzzle.Puzzle() eva_vec = puzzle.uncompress_cvec(tuple(map(int, eva.split(":")))) evb_vec = puzzle.uncompress_cvec(tuple(map(int, evb.split(":")))) return puzzle.get_distance_from_cvec(eva_vec, evb_vec) pass
def ImgCVEC(path): puzzle = pypuzzle.Puzzle() vec = puzzle.get_cvec_from_file(path) cmp_vec = puzzle.compress_cvec(vec) return ':'.join(map(str, cmp_vec)) pass