] images = [] for imgNames in imageNames: images.append(getImageData(imgNames)) alphaIgnoredImages = [] for img in images: imgWithoutAlpha = img[:,:,0:3] # print("imgWithoutAlpha", imgWithoutAlpha.shape) alphaIgnoredImages.append(imgWithoutAlpha) # print("images", images, images[0].shape, images[1].shape, images[2].shape) # print("alphaIgnoredImages", alphaIgnoredImages) reshapedImages = [] for img in alphaIgnoredImages: reshapedImg = img.reshape(1,-1) reshapedImages.append(reshapedImg) print("reshapedImages", reshapedImages, "dimension", reshapedImages[0].shape[1]) lshModel = LSH(noOfHashers=25, noOfHash=10, dimension=reshapedImages[0].shape[1]) for i in range(0, len(reshapedImages)): lshModel.train(reshapedImages[i], { "name": imageNames[i] }) print(lshModel.isSimilar(reshapedImages[0], reshapedImages[1])) print(lshModel.isSimilar(reshapedImages[0], reshapedImages[2])) print(lshModel.isSimilar(reshapedImages[1], reshapedImages[2])) print(lshModel.isSimilar(reshapedImages[2], reshapedImages[3]))
from sklearn.feature_extraction.text import CountVectorizer from lsh.lsh import LSH import numpy as np texts = [ 'Jack went to the market to buy some fruits', 'Jane went to the market to buy some fruits today', 'Robert and his team played hockey today' ] vectorizer = CountVectorizer() X = vectorizer.fit_transform(texts).toarray().reshape(len(texts), 1, -1) lshModel = LSH(noOfHashers=25, noOfHash=3, dimension=X.shape[2]) for i in range(0, X.shape[0]): lshModel.train(X[i], {"name": texts[i]}) print(lshModel.isSimilar(X[0], X[1])) print(lshModel.isSimilar(X[0], X[2])) print(lshModel.isSimilar(X[1], X[2]))