示例#1
0
class ImageSearcher():
    '''Image searcher API for clothes retrieval web demo'''
    def __init__(self):
        root_path = os.path.dirname(__file__)
        tree_path = os.path.abspath(os.path.join(root_path, 'db/tree'))
        inds_path = os.path.abspath(os.path.join(root_path, 'db/index'))
        feature_path = os.path.abspath(os.path.join(root_path, 'db/feature.npy'))
        self.searcher = Searcher(tree_path, inds_path, feature_path)
        

    def search(self, image_path, do_detection=0, k=20): 
        t1 = Timer()
        t1.tic()
        #queryFeatures = descriptor.get_descriptor(image_path, multi_box=False)
        queryFeatures = descriptor.get_descriptor(image_path,do_detection=do_detection)
        
        t1.toc('Feature Extraction time: ')
        t2 = Timer()
        t2.tic()
        results, dists, ind = self.searcher.search(queryFeatures,k=k)
        #self.queryExpansion(results, dists, ind)
        #self.queryExpansion(results, dists, ind)
        t2.toc('Knn search time: ')
        result = []
        dist = []
        for j,imageName in enumerate(results):
            if imageName not in result:
                result.append(imageName)
                dist.append(dists[j])
        return result[:k],dist[:k]
        
    def queryExpansion(self, results, dists, ind, threshold=0.8, k=10, top=4):
        """
        Do Query Expansion with at most top 5
        """
        features = self.searcher.features
        feature = []
        for i,dist in enumerate(dists):
            if dist < threshold and i < top:
                feature.append(features[ind[i]])
        if len(feature) == 0:
            return 0
        query = np.mean(np.array(feature), axis=0)
        new_results, new_dists, new_ind = self.searcher.search(query,k=k)
        for i,dist in enumerate(new_dists):
            if dist > dists[-1]:
                break
            for j,d in enumerate(dists):
                if dist < d:
                    results.insert(j, new_results[i])
                    dists.insert(j, dist)
                    break 
示例#2
0
class TestSearch(unittest.TestCase):

    doc1 = {'path': '/foo/doc1',
            'keywords': 'document,one,foo',
            'title': 'Document one title.',
            'text': 'This is the a test document. Wot!'}

    doc2 = {'path': '/foo/doc2',
            'keywords': 'document,two,bar',
            'title': 'Document two title.',
            'text': 'Testing is cool, yo.'}

    def setUp(self):
        self.searcher = Searcher(tempfile.mkdtemp())
        self.searcher.add_documents(json.dumps((self.doc1, self.doc2)))

    def test_search_by_text(self):
        results = self.searcher.search('Wot')
        self.assertEqual(1, len(results))
        for fieldname, fieldvalue in results[0].items():
            self.assertEqual(self.doc1.get(fieldname), fieldvalue)

        # Now stemming. Document 2 only has "testing" in its body but it'll be
        # matched anyway because the "text" field of the schema uses the
        # StemmingAnalyzer.
        results = self.searcher.search('test')
        self.assertEqual(2, len(results))
        matched_doc_paths = [hit['path'] for hit in results]
        self.assertIn('/foo/doc1', matched_doc_paths)
        self.assertIn('/foo/doc2', matched_doc_paths)

    def test_search_by_keyword(self):
        # Simple search by a single keyword.
        results = self.searcher.search('keywords:one')
        self.assertEqual(1, len(results))  # doc1
        self.assertEqual(results[0]['path'], '/foo/doc1')

        # By two keywords.
        results = self.searcher.search('keywords:one,foo')
        self.assertEqual(1, len(results))  # doc1
        self.assertEqual(results[0]['path'], '/foo/doc1')

        # Keyword intersection.
        results = self.searcher.search('keywords:one,two')
        self.assertEqual(0, len(results))  # There are none.

        # Search for docs with a "one" keyword and "yo" in the body.
        results = self.searcher.search('keywords:one yo')
        self.assertEqual(0, len(results))  # There are none.

        # Anything with the "document" keyword.
        results = self.searcher.search('keywords:document')
        self.assertEqual(2, len(results))  # Both docs.

    def test_search_by_title(self):
        results = self.searcher.search('title:"Document one"')
        self.assertEqual(1, len(results))  # doc1
        self.assertEqual(results[0]['path'], '/foo/doc1')
示例#3
0
文件: demo.py 项目: PierreHao/QScode
def test():
    start=''
    print 'press q to exit'
    args = parse_args()
    index = cPickle.loads(open(args.index).read())
    searcher = Searcher(index)
    while True:
        start = raw_input('please input image address: ')       
        if start == 'q':
            break
        #start = 'query-images/' + start
        try:
            queryImage = cv2.imread(start)
            queryFeatures = clothes.run(start)
            results = searcher.search(queryFeatures)
            result = []
            result.append(queryImage)
            for j in xrange(0, 15):
                # grab the result (we are using row-major order) and
                # load the result image
                (score, imageName) = results[j]
                #path = args.dataset + "/%s" % (imageName)
                result.append(cv2.imread(imageName))
                print "\t%d. %s : %.3f" % (j + 1, imageName, score)
            plot(result)
        
            plt.show()
        except:
            print "error"
示例#4
0
class Index(object):

    def __init__(self, cache_host=None, cache_port=None, db_file_path=None,
                 db_url=None, load_from_db=None):
        cache = create_index_cache(host=cache_host, port=cache_port)
        db = create_index_store(file_path=db_file_path, url=db_url)

        self.reader = IndexReader(db, cache)
        self.writer = IndexWriter(db, cache)
        self.searcher = Searcher(self.reader)

        if load_from_db:
            self.load_from_db()
    def search(self, query):
        return self.searcher.search(query)

    def commit(self):
        self.writer.db.commit()
        self.load_from_db()

    def load_from_db(self):
         # Refresh data in reader with data from database
        self.reader.load_from_db()

        # Push the new data into the cache
        self.writer.build_cache(self.reader.doc_word_scores)

    def writer(self):
        return self.writer

    def reader(self):
        return self.reader
示例#5
0
文件: demo.py 项目: PierreHao/QScode
def testSave():
    args = parse_args()
    searcher = Searcher('data/tree','data/index')
    imagesDir = args.query
    ext = ['jpg','png','jpeg','JPG','PNG','JPEG']
    images = os.listdir(imagesDir)
    flag = 1
    import time
    start = time.time()
    for image in images:
        imType = image.split('.')[-1]
        if imType in ext:
            image_path = os.path.join(imagesDir,image)
            try:
                queryImage = cv2.imread(image_path)
                queryFeatures = descriptor.get_descriptor(image_path, multi_box=False)
                results = searcher.search(queryFeatures)
                result = []
                result.append(queryImage)
                for j in xrange(0, 14):
                    # grab the result (we are using row-major order) and
                    # load the result image
                    imageName = results[j]
                    #path = args.dataset + "/%s" % (imageName)
                    #print os.path.join(args.dataset,imageName)
                    result.append(cv2.imread(os.path.join(args.dataset,imageName)))
                    #print "\t%d. %s : %.3f" % (j + 1, imageName, score)
                plot(result,flag,args.save)
                flag += 1
            except:
                print 'error with',image_path
    print 'Total time: ',time.time() - start                 
示例#6
0
文件: demo.py 项目: PierreHao/QScode
def test():
    start=''
    clothes = feature.Clothes()
    print 'press q to exit'
    args = parse_args()
    clothes = feature.Clothes(init_pca=1)
    index = cPickle.loads(open(args.index).read())
    searcher = Searcher(index)
    while True:
        start = raw_input('please input image address: ')       
        if start == 'q':
            break
        start = 'query-images/' + start
        try:
            queryImage = cv2.imread(start)
            queryFeatures = descriptor.get_descriptor(image_path, multi_box=False)
            results = searcher.search(queryFeatures)
            result = []
            result.append(queryImage)
            for j in xrange(0, 15):
                # grab the result (we are using row-major order) and
                # load the result image
                imageName = results[j]
                #path = args.dataset + "/%s" % (imageName)
                result.append(cv2.imread(imageName))
            plot(result)
        
            plt.show()
        except:
            print "error"
示例#7
0
def search():
    query = request.form['query']
    field = request.form['field']
    searcher = Searcher()
    result = searcher.search(query, field)
    return render_template("results.html",
                           query=query,
                           videos=result["videos"])
示例#8
0
    def colorSearch(self):
        '''
        Searches query image against index and returns the specified number of matches.
        Results are in the format (chi-squared distance, image name).
        '''
        searcher = Searcher(self.colorIndex)
        queryFeatures = self.createHistogram(self.image)

        results = searcher.search(queryFeatures)
        return results
示例#9
0
    def colorSearch(self):
        '''
        Searches query image against index and returns the specified number of matches.
        Results are in the format (chi-squared distance, image name).
        '''
        searcher = Searcher(self.colorIndex)
        queryFeatures = self.createHistogram(self.image)

        results = searcher.search(queryFeatures)
        return results
示例#10
0
def mainapp():
    result = ""
    input = request.args.get('searchstr')
    sorttype = request.args.get('sortselect')
    results = []
    if ((input is not None and sorttype is not None) and input != ""):
        searcher = Searcher(input, sorttype)
        results = searcher.search()
    if input is None:
        input = ""
    widgets = [r.widget for r in results]
    #return render_template('index.html', input=input,result="".join(widgets))
    return render_template('index2.html', input=input, results=results)
示例#11
0
文件: mainapp.py 项目: aknott/scearch
def mainapp():
	result=""
	input = request.args.get('searchstr')
	sorttype = request.args.get('sortselect')
	results = []
	if((input is not None and sorttype is not None) and input != ""):
		searcher = Searcher(input,sorttype)
		results = searcher.search()
	if input is None:
		input = ""
	widgets = [r.widget for r in results]
	#return render_template('index.html', input=input,result="".join(widgets))
	return render_template('index2.html', input=input,results=results)
示例#12
0
文件: demo.py 项目: PierreHao/QScode
def testSave():
    args = parse_args()
    #searcher = ImageSearcher()
    searcher = Searcher(args.index, args.feature)
    imagesDir = args.query
    ext = ['jpg','png','jpeg','JPG','PNG','JPEG']
    images = os.listdir(imagesDir)
    os.system('rm %s'%(args.save+'/*'))
    f = open(os.path.join(args.save,'info.txt'),'w')
    flag = 1
    import time
    start = time.time()
    for image in images:
        imType = image.split('.')[-1]
        if imType in ext:
            image_path = os.path.join(imagesDir,image)
            try:
                queryImage = cv2.imread(image_path)
                queryFeatures = descriptor.get_descriptor(image_path)
                results,dists,ind = searcher.search(queryFeatures,k=15)
                #results,dists = searcher.search(image_path, do_detection=1, k=15)
                result = []
                result.append(queryImage)
                dist = []
                dist.append(1)
                print '~~~~~~~~~~~~~~~',flag,'~~~~~~~~~~~~~~~~~~~'
                f.write(str(flag)+'.~~~~~~~~~~~~~~~~'+'\n')
                f.write(image_path+'\n')
                for j in xrange(0, 15):
                    # grab the result (we are using row-major order) and
                    # load the result image
                    score,imageName = dists[j], results[j]
                    #path = args.dataset + "/%s" % (imageName)
                    #print os.path.join('images-to-index',imageName)
                    #result.append(cv2.imread(os.path.join(args.dataset,imageName)))
                    result.append(cv2.imread(imageName))
                    f.write(imageName+'\n')
                    dist.append(score)
                    #print "\t%d. %s : %.3f" % (j + 1, imageName, score)
                    #print score
                plot(result,dist,flag,args.save)
                flag += 1
            except:
                print 'error with',image_path
    f.close()
    print 'Total time: ', time.time() - start                 
示例#13
0
    def colorSearch(self, max_matches=5):
        '''
        Searches query image against index and returns the specified number of matches.
        Results are in the format (chi-squared distance, image name).
        '''
        self.index = self.createIndex()

        image = cv2.imread(self.image)
        print("Querying: " + self.image + " ...")
        searcher = Searcher(self.index)
        queryFeatures = self.createHistogram(image)

        results = searcher.search(queryFeatures)[:max_matches]

        print("Matches found:")
        for j in range(len(results)):
            (score, imageName) = results[j]
            print("\t%d. %s : %.3f" % (j+1, imageName, score))

        return results
示例#14
0
    def searchByColor(self):
        '''
        Searches query image against index and returns the specified number of matches.
        '''

        MAX_NUMBER_MATCHES = 5

        image = cv2.imread(self.image)
        print("Querying: " + self.image + " ...")
        searcher = Searcher(self.index)
        queryFeatures = self.createHistogram(image)

        results = searcher.search(queryFeatures)[:MAX_NUMBER_MATCHES]

        print("Matches found:")
        for j in range(len(results)):
            (score, imageName) = results[j]
            print("\t%d. %s : %.3f" % (j+1, imageName, score))

        return results
示例#15
0
文件: api.py 项目: PierreHao/QScode
class ImageSearcher():
    '''Image searcher API for clothes retrieval web demo'''
    def __init__(self):
        root_path = os.path.dirname(__file__)
        inds_path = os.path.abspath(os.path.join(root_path, 'db/index'))
        feature_path = os.path.abspath(os.path.join(root_path, 'db/feature.npy'))
        self.searcher = Searcher(inds_path, feature_path)
        self.local_features = np.load('db/local_features.npy')

    def search(self, image_path, do_detection=1, k=10): 
        #queryImage = cv2.imread(image_path)
        t1 = Timer()
        t1.tic()
        #queryFeatures = descriptor.get_descriptor(image_path, multi_box=False)
        queryFeatures = descriptor.get_descriptor(image_path)
        
        t1.toc('Feature Extraction time: ')
        t2 = Timer()
        t2.tic()
        #p = Profile()
        #results = p.runcall(self.searcher.search, queryFeatures)
        #p.print_stats()
        results, dists, ind = self.searcher.search(queryFeatures,k=5*k)
        #self.reranking(queryFeatures, results, dists, ind, 0.6)
        #self.queryExpansion2(results, dists, ind)
        #self.queryExpansion(queryFeatures, results, dists, ind, top=3)
        t2.toc('Knn search time: ')
        result = []
        # origine image
        #result.append(image_path)
        dist = []
        for j,imageName in enumerate(results):
            if imageName not in result:
                result.append(imageName)
                dist.append(dists[j])
        #print result[:k]
        return result[:k],dist[:k]

    def reranking(self, queryFeatures, results, dists, ind, rerank_thresh=0.7):
        features = self.local_features
        feature = []
        flag = 0
        dist = 0
        res = []
        for i,index in enumerate(ind):
            if dists[i] < rerank_thresh:
                flag += 1
            else:
                if dist == 0:
                    dist = dists[i-1]
                feature.append(features[index])
                res.append(results[i])
        if len(feature) < 3:
            return
        feature = np.array(feature).copy()
        result,new_ind = self.searcher.research(res, queryFeatures, feature, 3)
        for j,r in enumerate(result):
            results.insert(flag+j, r)
            dists.insert(flag+j, dist)

    def queryExpansion2(self, results, dists, ind, threshold=0.3, k=10, top=3):
        features = self.searcher.features
        feature = []
        for i in xrange(top):
            query = features[ind[i]]
            if dists[i] > threshold:
                break
            new_result, new_dist, new_ind = self.searcher.search(query,k=k)
            for j,dist in enumerate(new_dist):
                if dist > threshold:
                    break
                for k,d in enumerate(dists[i:]):
                    if dist < d:
                        results.insert(i+k, new_result[j])
                        dists.insert(i+k, dist)
                        break

    def queryExpansion(self, queryFeatures, results, dists, ind, threshold=0.8, k=10, top=5):
        """
        Do Query Expansion with at most top
        """
        features = self.searcher.features
        feature = []
        #feature.append(queryFeatures)
        for i,dist in enumerate(dists):
            #if dist < threshold and i < top:
            if i < top:
                feature.append(features[ind[i]])
        if len(feature) == 0:
            return 0
        query = np.mean(np.array(feature), axis=0)
        new_results, new_dists, new_ind = self.searcher.search(query,k=k)
        for i,dist in enumerate(new_dists):
            if dist > dists[-1]:
                break
            for j,d in enumerate(dists):
                if dist < d:
                    results.insert(j, new_results[i])
                    dists.insert(j, dist)
                    break 
示例#16
0
ap.add_argument("-d", "--dataset", required=True,
                help="Path to indexed image dataset")
ap.add_argument("-i", "--index", required=True,
                help="Path to index file")

args = vars(ap.parse_args())

# Load index  and initialize our searcher
index = cPickle.loads(open(args["index"]).read())
searcher = Searcher(index)

# loop over images in the index -- we will use each one as
# a query image
for (query, queryfeatures) in index.items():
    # perform the search using the current query
    results = searcher.search(queryfeatures)

    # load the query image and display it
    path = args["dataset"] + "/%s" %(query)
    queryImage = cv2.imread(path)
    cv2.imshow("Query", queryImage)
    print "query: %s" %(query)

    # intialize the 2 montages to display our results --
    # we have a total of 25 iimages in the index, but let's only
    # display the top 10 results; 5 images per montage, with
    # images that are 400x166 pixels

    montageA = np.zeros((166*5, 400, 3), dtype="uint8")
    montageB = np.zeros((166*5, 400, 3), dtype="uint8")
示例#17
0
print(preprocessor.transform("Hom qua em den truong tai Ha Noi, me dat tay tung buoc den Sai Gon"))
print(preprocessor.entities)'''
from os import listdir
from search import Searcher
searcher = Searcher()
fields = {'id': False, 'title': True, 'content': True, 'out': False}

docs = []
for fname in listdir('./data/folders/1001 bí ẩn/'):
    item = {
        'id': len(docs) + 1,
        'title': fname[:-4],
        'content': open('./data/folders/1001 bí ẩn/' + fname).read(),
        'out': fname[:-4]
    }
    docs.append(item)

searcher.set_fields(fields)
searcher.fit(docs[:10])
for i in range(10):
    print(docs[i]['title'])
i = 10
while True:
    s = input('Already to test: ')
    if s == 'add':
        print(docs[i]['title'])
        searcher.add_document(docs[i])
        i += 1
    else:
        searcher.search(s)
示例#18
0
def api_search():
    query = request.args['query']
    field = request.args['field']
    searcher = Searcher()
    result = searcher.search(query, field)
    return jsonify(result)
示例#19
0
def main_1(var):

    num_groups = int(var[0])

    num_clusters = int(var[1])

    if var[2] >= 50:
        dist_function_name = 'euclidean'
    else:
        dist_function_name = 'cosine'
    threshold = var[3]

    server_url = 'localhost:9200'
    num_queries = 200

    with open('evaluation_set.json') as f:
        evaluation_set = json.load(f)
        f.close()

    training_embedding_vectors = np.load("PCA_2048_to_512_new.npy")
    query_vector_indices = random.sample(range(len(evaluation_set.keys())),
                                         num_queries)
    train_labels, image_names = get_image_data(
        'vn_celeb_face_recognition/train.csv')

    # print("working on {} groups, {} clusters, {} threshold".format(num_groups, num_clusters, threshold))
    search_times = []
    mean_average_accuracy = 0
    mean_recall = 0

    for query_vector_index in query_vector_indices:

        query_vector = training_embedding_vectors[evaluation_set[str(
            query_vector_index)][0]]
        # print(query_vector)
        actual_query_label = train_labels[evaluation_set[str(
            query_vector_index)][0]]
        num_actual_results = len(evaluation_set[str(actual_query_label)])
        # print(actual_query_label)
        # print("------------")

        es = Elasticsearch(server_url)
        index_name = 'face_off_' + str(num_groups) + 'groups_' + str(
            num_clusters) + 'clusters_vgg'
        if not es.indices.exists(
                index_name
        ):  # if data is not indexed, create index and take data to ES
            # then query
            data_encoder = DataEncoder(num_groups, num_clusters, 1000,
                                       training_embedding_vectors,
                                       'encode_results_vgg')
            data_encoder.run_encode_data()
            json_string_tokens_generator = JsonStringTokenGenerator(
                'encode_results_vgg', 'PCA_2048_to_512_new.npy',
                'vn_celeb_face_recognition/train.csv', num_groups,
                num_clusters)
            encoded_string_tokens_list = json_string_tokens_generator.get_string_tokens_list(
            )
            train_embs = json_string_tokens_generator.get_image_fetures()
            train_labels, image_names = json_string_tokens_generator.get_image_metadata(
            )
            json_string_tokens_list = json_string_tokens_generator.generate_json_string_tokens_list(
                encoded_string_tokens_list, train_labels, image_names,
                train_embs)
            json_string_tokens_generator.save_json_string_tokens(
                json_string_tokens_list)

            print('saving completed....')
            print('******************************')
            indexer = ESIndexer('encode_results_vgg', num_groups, num_clusters,
                                server_url, 'vgg')
            indexer.index()

            start_time = datetime.now()
            searcher = Searcher(threshold, num_groups, num_clusters,
                                query_vector, server_url, index_name,
                                dist_function_name, 'vgg')
            results = searcher.search()
            # print(len(results))
            if len(results) == 0: continue
            search_time = datetime.now() - start_time
            search_time_in_ms = (search_time.days * 24 * 60 * 60 +
                                 search_time.seconds) * 1000 + \
                                search_time.microseconds / 1000.0
            search_times.append(search_time_in_ms)
        else:  # if not, commit query
            start_time = datetime.now()
            searcher = Searcher(threshold, num_groups, num_clusters,
                                query_vector, server_url, index_name,
                                dist_function_name, 'vgg')
            results = searcher.search()
            # print(len(results))
            if len(results) == 0: continue
            search_time = datetime.now() - start_time
            search_time_in_ms = (search_time.days * 24 * 60 * 60 +
                                 search_time.seconds) * 1000 + \
                                search_time.microseconds / 1000.0
            search_times.append(search_time_in_ms)

        results_labels = list()
        for result in results:
            results_labels.append(result['id'])

        # with open('evaluation_set.json', 'r') as fh:
        #     evaluation_set_dict = json.load(fh)
        #     fh.close()

        accuracy_i = 0
        for i in range(len(results)):
            step_list = results_labels[:(i + 1)]
            num_corrects = len([
                i for i, x in enumerate(step_list) if x == actual_query_label
            ])
            accuracy_i += num_corrects / len(step_list)
        # print(accuracy_i/num_returns)
        mean_average_accuracy += accuracy_i / len(results)

        recall_i = num_corrects / num_actual_results
        # print(num_corrects)
        mean_recall += recall_i

        # print("*************************************")

    mean_average_accuracy = mean_average_accuracy / num_queries
    mean_recall = mean_recall / num_queries
    print(mean_average_accuracy, mean_recall)
    # print("precision: {} and recall: {}".format(mean_average_accuracy, mean_recall))
    # print(average_search_time)
    # print(mean_average_accuracy)

    return 3 - mean_average_accuracy - mean_recall - (
        2 * mean_average_accuracy * mean_recall /
        (mean_average_accuracy + mean_recall))
示例#20
0
#coding:utf-8
from index import Indexer
from search import Searcher

if __name__ == '__main__':
    index = Indexer("docs.txt")
    searcher = Searcher(index)

    i = 0
    while 1:
        i += 1

        input = raw_input(str(i) + ".请输入问题:")
        doclist = searcher.search(input.decode('utf-8'))

        if len(doclist) > 0:
            for doc in doclist:
                print doc.id, doc.name, doc.text
        else:
            print "无相关结果"
        print "\n"
示例#21
0
ap = argparse.ArgumentParser()
ap.add_argument("-q", "--query", required=True, help="Path to input image")
arg = vars(ap.parse_args())

f = open('dictionary.txt', 'r')
dataset = cPickle.loads(f.read())

queryImage = cv2.imread(arg["query"])
cv2.imshow("QueryImage", queryImage)
print "Query :: %s" % (arg["query"][arg["query"].rfind('/') + 1:])

rgbHist = RGBHist([8, 8, 8])
queryHist = rgbHist.getHist(queryImage)

searcher = Searcher(dataset)
results = searcher.search(queryHist)

set1 = np.zeros((150 * 5, 400, 3), dtype='unit8')
set2 = np.zeros((150 * 5, 400, 3), dtype='unit8')

for i in xrange(0, 10):
    (fileName, dist) = results[i]
    print "Result %d :: %s, Score :: %f" % (i, fileName, dist)
    path = './dataset/' + fileName
    image = cv2.imread(path)
    if i < 5:
        set1[150 * i:150 * (i + 1), :, :] = image
    else:
        set2[150 * (i - 5):150 * (i - 4), :, :] = image

cv2.imshow("SearchResults 1-5", set1)
示例#22
0
def main():
    server_url = 'localhost:9200'
    num_queries = 1000

    with open('hyper_params_set.json', 'r') as fh:
        hyper_params = json.load(fh)
        nums_groups = hyper_params['nums_groups']
        nums_clusters = hyper_params['nums_clusters']
        thresholds = hyper_params['thresholds']
        fh.close()

    with open('evaluation_set.json') as f:
        evaluation_set = json.load(f)
        f.close()

    final_results = []

    training_embedding_vectors = np.load("train_embs_VGGFace.npy")
    query_vector_indices = random.sample(range(len(evaluation_set.keys())),
                                         num_queries)
    train_labels, image_names = get_image_data(
        'vn_celeb_face_recognition/train.csv')

    for threshold in thresholds:
        for num_groups in nums_groups:
            for num_clusters in nums_clusters:

                print("working on {} groups, {} clusters, {} threshold".format(
                    num_groups, num_clusters, threshold))
                search_times = []
                mean_average_accuracy = 0
                mean_recall = 0
                for query_vector_index in query_vector_indices:

                    query_vector = training_embedding_vectors[evaluation_set[
                        str(query_vector_index)][0]]
                    actual_query_label = train_labels[evaluation_set[str(
                        query_vector_index)][0]]
                    num_actual_results = len(
                        evaluation_set[str(actual_query_label)])
                    # print(actual_query_label)
                    # print("------------")

                    es = Elasticsearch(server_url)
                    index_name = 'face_off_' + str(
                        num_groups) + 'groups_' + str(
                            num_clusters) + 'clusters_vgg'
                    if not es.indices.exists(
                            index_name
                    ):  # if data is not indexed, create index and take data to ES
                        # then query
                        indexer = ESIndexer('encode_results_vgg', num_groups,
                                            num_clusters, server_url, 'vgg')
                        indexer.index()

                        start_time = datetime.now()
                        searcher = Searcher(threshold, num_groups,
                                            num_clusters, query_vector,
                                            server_url, index_name, 'cosine',
                                            'vgg')
                        results = searcher.search()
                        # print(len(results))
                        if len(results) == 0: continue
                        search_time = datetime.now() - start_time
                        search_time_in_ms = (search_time.days * 24 * 60 * 60 +
                                             search_time.seconds) * 1000 + \
                                             search_time.microseconds / 1000.0
                        search_times.append(search_time_in_ms)
                    else:  # if not, commit query
                        start_time = datetime.now()
                        searcher = Searcher(threshold, num_groups,
                                            num_clusters, query_vector,
                                            server_url, index_name, 'cosine',
                                            'vgg')
                        results = searcher.search()
                        # print(len(results))
                        if len(results) == 0: continue
                        search_time = datetime.now() - start_time
                        search_time_in_ms = (search_time.days * 24 * 60 * 60 +
                                             search_time.seconds) * 1000 + \
                                            search_time.microseconds / 1000.0
                        search_times.append(search_time_in_ms)

                    # print(len(results))
                    results_labels = list()
                    for result in results:
                        # print(result['id'])
                        results_labels.append(result['id'])

                    # with open('evaluation_set.json', 'r') as fh:
                    #     evaluation_set_dict = json.load(fh)
                    #     fh.close()

                    accuracy_i = 0
                    for i in range(len(results)):
                        step_list = results_labels[:(i + 1)]
                        num_corrects = len([
                            i for i, x in enumerate(step_list)
                            if x == actual_query_label
                        ])
                        accuracy_i += num_corrects / len(step_list)
                    # print(accuracy_i/num_returns)
                    mean_average_accuracy += accuracy_i / len(results)

                    recall_i = num_corrects / num_actual_results
                    # print(num_corrects)
                    mean_recall += recall_i

                    # print("*************************************")
                average_search_time = round(
                    np.mean(np.asarray(search_times)) / 1000, 3)
                mean_average_accuracy = mean_average_accuracy / num_queries
                mean_recall = mean_recall / num_queries
                # print(average_search_time)
                # print(accuracy)

                final_results.append([
                    num_groups, num_clusters, threshold, num_queries,
                    'euclidean', average_search_time,
                    round(mean_average_accuracy, 4),
                    round(mean_recall, 4)
                ])
                print([
                    num_groups, num_clusters, threshold, num_queries,
                    'euclidean', average_search_time,
                    round(mean_average_accuracy, 4),
                    round(mean_recall, 4)
                ])

                print("finish")
                print("-----------------------------------------------")
示例#23
0
文件: api.py 项目: PierreHao/QScode
class ImageSearcher():
    '''Image searcher API for clothes retrieval web demo'''
    def __init__(self):
        root_path = os.path.dirname(__file__)
        #tree_path = os.path.abspath(os.path.join(root_path, 'db/tree5'))
        inds_path = os.path.abspath(os.path.join(root_path, 'db/index'))
        feature_path = os.path.abspath(os.path.join(root_path, 'db/feature.npy'))
        self.searcher = Searcher(tree_path, inds_path, feature_path)
        #self.dataset = os.path.abspath(os.path.join(root_path, '../CBIR/datasets'))
        #label_path = os.path.abspath(os.path.join(root_path, 'db/label.pkl'))
        self.label = cPickle.loads(open(label_path).read()) 

    def search(self, image_path, do_detection=1, k=50): 
        #queryImage = cv2.imread(image_path)
        t1 = Timer()
        t1.tic()
        #queryFeatures = descriptor.get_descriptor(image_path, multi_box=False)
        queryFeatures, label = descriptor.get_descriptor(image_path,
                                                         multi_box=False,
                                                         get_label=True,
                                                         do_detection=do_detection)
        flag = []
        #flag = [] # if do, we donot use class to filter result       
        t1.toc('Feature Extraction time: ')
        t2 = Timer()
        t2.tic()
        #p = Profile()
        #results = p.runcall(self.searcher.search, queryFeatures)
        #p.print_stats()
        results, dists = self.searcher.search(queryFeatures)
        print dists
        t2.toc('Knn search time: ')
        result = []
        # origine image
        #result.append(image_path)
        if len(flag) != 0:
            for j in xrange(0, k):
                imageName = results[j]
                if imageName not in result:    
                    #Juge class error but image similarity is high
                    if dists[j] < 0.05:
                        result.append(imageName)
                        continue
                    #if dists[j] > 0.2:
                    #    break
                    #judge wether image belongs to the class
                    image_path = imageName.split('/')
                    image_dir = image_path[0]+'/'+image_path[1]+'/'+image_path[2]
                    #print image_dir
                    if image_dir in flag:
                        result.append(imageName)
                    #else:
                    #    result.append(imageName)
        print 'total result', len(result)
        if len(result)<3:
            # if result about class is less than 5, we do search in all datasets
            #print 'total result', len(result)
            k = 30
            result = []
            for j in xrange(0, k):
                imageName = results[j]
                if imageName not in result:
                    #if dists[j] > 0.2:
                    #    break
                    result.append(imageName)
        
        return result 
示例#24
0
文件: hw4.py 项目: gbehrendt/Python
from cspProblemDefine import CSP, Constraint, ne_, is_ 
from operator import lt,ne,eq,gt
from search import Search_from_CSP, Searcher

def meet_at(p1,p2):
    """returns a function that is true when the words meet at the postions p1, p2
    """
    def meets(w1,w2):
        return w1[p1] == w2[p2]
    meets.__name__ = "meet_at("+str(p1)+','+str(p2)+')'
    return meets

crossword1 = CSP({'one_across':{'ant', 'bus', 'car', 'has'},
                  'one_down':{'buys', 'hold', 'lane', 'year'},
                  'three_across':{'buys', 'hold', 'lane', 'year'},
                  'two_down':{'search', 'syntax'},
                  'four_across':{'ant', 'bus', 'car', 'has'}
                  },
                  [Constraint(('one_across','one_down'),meet_at(0,0)),
                   Constraint(('one_down','three_across'),meet_at(2,0)),
                   Constraint(('one_across','two_down'),meet_at(2,0)),
                   Constraint(('three_across','two_down'),meet_at(2,2)),
                   Constraint(('four_across','two_down'),meet_at(0,4))
                   ])
    
searcher3 = Searcher(Search_from_CSP(crossword1))
print('The first solution searched is:')
print(searcher3.search())