def eval_index_with_pca(index2, kind, pca): datastore_client = datastore.Client('adina-image-analysis') q = datastore_client.query(kind=kind) q.add_filter('Indexed(ORB)', '=', "No") q.keys_only() q_results = list(q.fetch()) print("len_q_results", len(q_results)) h = 0 desc_all = [] filenames = [] looped_entities = [] for key_entity in q_results: the_key = key_entity.key q2 = datastore_client.query(kind=kind) q2.key_filter(the_key) image_entity = (list(q2.fetch()))[0] if utils.check_if_indexed(image_entity, 'Indexed(ORB)') == True: continue desc = json.loads(image_entity['ORB Descriptors']) image_entity_name = image_entity.key.id_or_name try: image_entity_name = int(image_entity_name) except: print(image_entity_name) continue for i in range(len(desc)): desc_all.append(desc[i]) filenames.append(image_entity_name) looped_entities.append(image_entity) h += 1 np_desc_all = np.array(desc_all).astype('float32') des_orb_post_pca = pca.transform(np_desc_all) filenames = np.array(filenames) filenames = filenames.astype(int) try: index2.add_with_ids(des_orb_post_pca, filenames) except ValueError as e: print(e) print("filenames", filenames.shape, filenames) # write the index to a file faiss.write_index(index2, "eval_full3_index.index") # write the files to the bucket utils.add_file_to_bucket("eval_full3_index.index") utils.set_indexed_Yes(looped_entities, 'Indexed(ORB)') print("set_indexed_Yes", h) return index2
def eval_vgg_with_l2_norm_index(index2, kind, batch_size): datastore_client = datastore.Client('adina-image-analysis') q = datastore_client.query(kind=kind) q.add_filter('Indexed(VGG16)', '=', "No") q.keys_only() q_results = list(q.fetch()) print("len_q_results", len(q_results)) h = 0 c = 0 desc_all = [] filenames = [] looped_entities = [] indexed_entities = [] for key_entity in q_results: key = datastore_client.key(kind, key_entity.key.id_or_name) image_entity = datastore_client.get(key) if utils.check_if_indexed(image_entity, 'Indexed(VGG16)') == True: continue if c < batch_size: desc = json.loads(image_entity['VGG16 Descriptors']) desc = np.array(desc) desc = normalize(desc, norm='l2') image_entity_name = image_entity.key.id_or_name try: image_entity_name = int(image_entity_name) except: print(image_entity_name) continue looped_entities.append(image_entity) for i in range(desc.shape[0]): desc_all.append(convert_nparray_to_List(desc[i])) filenames.append(image_entity_name) c += 1 h += 1 else: np_desc_all = np.array(desc_all).astype('float32') filenames = np.array(filenames) filenames = filenames.astype(int) try: index2.add_with_ids(np_desc_all, filenames) except ValueError as e: print(e) print("filenames", filenames.shape, filenames) indexed_entities.extend(looped_entities) c = 0 h += 1 desc_all = [] filenames = [] if h % 1000 == 0: print("batches_index", h, "!!!") if h % 10000 == 0: # write the index to a file faiss.write_index(index2, "eval_full_vgg16_l2_index.index") # write the files to the bucket utils.add_file_to_bucket("eval_full_vgg16_l2_index.index") utils.set_indexed_Yes(looped_entities, 'Indexed(VGG16)') print("set_indexed_Yes", h) indexed_entities = [] return index2, indexed_entities