def sample(self, image, cls1, cls2, feature_object): """ create image sample """ idx = image.split('/')[-1] idx = idx.split('.')[0] sample_cache = '{}-{}-{}-{}'.format(feature_object.name + '-based', idx, cls1, cls2) try: samples = cPickle.load( open( os.path.join(Config.engine_cache_path() + '/single', sample_cache), "rb", True)) return samples except: samples = [] histogram = feature_object.fire(image) samples.append({ 'img': idx, 'cls1': cls1, 'cls2': cls2, 'hist': histogram }) cPickle.dump( samples, open( os.path.join(Config.engine_cache_path() + '/single', sample_cache), "wb", True)) return samples
def getImagePaths(self, create=True): cache = SimpleCache() if not create: rv = cache.get('image-paths') if rv is not None: return rv try: with MySQL.conn(MySQL) as cursor: categories = {} sql = "SELECT id, category_id, alias FROM `categories` WHERE id > 0 ORDER BY id ASC" cursor.execute(sql) for row in cursor: idx = row['id'] category_id = row['category_id'] alias = row['alias'] if category_id == 0: categories[idx] = alias else: categories[idx] = categories[category_id] + '/' + alias if create: if not os.path.exists(Config.product_image_path() + '/' + categories[idx]): os.makedirs(Config.product_image_path() + '/' + categories[idx]) cache.set('image-paths', categories, timeout=60 * 60 * 24 * 365) return categories except: return False
def conn(self): """ Returns cursor of MySQL connection """ if not self.connection: self.connection = pymysql.connect( host=Config.databaseHost(), unix_socket=Config.databaseSocket(), user=Config.databaseUser(), passwd=Config.databasePassword(), db=Config.database(), autocommit=True, cursorclass=pymysql.cursors.DictCursor) return self.connection.cursor()
def samples(self, feature_object): """ create samples of all collection """ db = ImageCollection() sample_cache = "{}-{}".format(feature_object.name + '-based', 'all-products') try: samples = cPickle.load( open(os.path.join(Config.engine_cache_path(), sample_cache), "rb", True)) return samples except: samples = [] i = 0 data = db.get_collection() for d in data.itertuples(): try: cls1, cls2, img = getattr(d, "cls1"), getattr( d, "cls2"), getattr(d, "img") image = Config.product_image_path() if len(cls1) > 0: image = image + '/' + cls1 if len(cls2) > 0: image = image + '/' + cls2 image = image + '/' + img samples.append( self.sample(image, cls1, cls2, feature_object)) i = i + 1 print(i) except: continue with open(Config.engine_cache_path() + '/' + sample_cache, 'wb') as fp: dill.dump(samples, fp) return samples
def find(self, img, cls1, cls2, feature_object): """ Find similar images """ depth = Config.compareDepth() feature = Feature() query = feature.sample(img, cls1, cls2, feature_object) query = query[0] result = [] samples = feature.samples(feature_object) for sample in samples: sample = sample[0] result.append({ 'idx': sample['img'], 'cls1': sample['cls1'], 'cls2': sample['cls2'], 'dis': self.distance(query['hist'], sample['hist']) }) result = sorted(result, key=lambda x: x['dis']) if depth and depth <= len(result): result = result[:depth] return result
def category(self, category, url, page, image_paths, mysql): """ Fetch all product in given url, and insert to database """ print('Page: ' + str(page)) print() print() print() curl = Curl() body = curl.fetch(url) soup = BeautifulSoup(body, "html.parser") products = soup.findAll('li', attrs={'class': 'product-card-wrapper'}) for product in products: try: soup = BeautifulSoup(product.encode('utf-8'), "html.parser") # title title = soup.find('img', attrs={'class': 'product-image'}) title = title.get('title').encode('utf-8').strip() title = title.decode('utf-8').split('/////') title = title[0] # link link = soup.find('a', attrs={'class': 'product-detail-link'}) link = link.get('href').encode('utf-8').strip() link = link.decode('utf-8') # photo photo = soup.find('img', attrs={'class': 'product-image'}) photo = photo.get('src').encode('utf-8').strip() if photo.decode('utf-8') == '/Content/images/defaultThumb.jpg': photo = soup.find('img', attrs={'class': 'product-image'}) photo = photo.get('data-original').encode('utf-8').strip() photo = photo.decode('utf-8').split( "https://img-trendyol.mncdn.com/") photo = "https://img-trendyol.mncdn.com/mnresize/200/200/" + photo[ 1] # old price try: old_price = soup.find( 'span', attrs={'class': 'product-market-price'}) old_price = old_price.text.encode('utf-8').strip() \ .decode('utf8').replace(" TL", "").replace(".", "").replace(",", ".") except: old_price = 0 # new price try: new_price = soup.find( 'span', attrs={'class': 'product-sale-price'}) new_price = new_price.text.encode('utf-8').strip().decode('utf8') \ .replace(" TL", "").replace(".", "").replace(",", ".") except: new_price = 0 # discount try: discount = soup.find('div', attrs={'class': 'discountBox'}) soup = BeautifulSoup(discount.encode('utf-8'), "html.parser") discount = soup.find('span') discount = discount.text.encode('utf-8').strip().decode( 'utf8').replace(" TL", "").replace("%", "") except: discount = 0 # currency currency = 'TRY' # category category_id = self.product(title) if category_id <= 0: if category == 'women': category_id = 11 elif category == 'men': category_id = 16 elif category == 'baby': category_id = 32 else: category_id = 27 # image path image_path = image_paths[category_id] # insert product_id = mysql.insertProduct(self.site_id, category_id, title, new_price, old_price, discount, currency, link) if product_id: curl.download( photo, Config.product_image_path() + '/' + image_path + '/' + str(product_id) + '.jpg') print('Added: ' + str(product_id)) print('Category: ' + str(category_id)) print('Image Path: ' + str(image_path)) if os.path.exists(Config.product_image_path() + '/' + image_path + '/' + str(product_id) + '.jpg'): print('Image is added.') else: print('Image is not added.') print('Url: ' + link) print() print() print() # sleep time.sleep(self.sleep) except: continue
def category(self, url, page, image_paths, mysql): """ Fetch all product in given url, and insert to database """ print('Page: ' + str(page)) print() print() print() curl = Curl() body = curl.fetch(url) soup = BeautifulSoup(body, "html.parser") products = soup.findAll('li', attrs={'class': 'search-item'}) for product in products: try: soup = BeautifulSoup(product.encode('utf-8'), "html.parser") # title title = soup.find('h3', attrs={'class': 'product-title'}) title = title.get('title').encode('utf-8').strip() title = title.decode('utf-8').split('/////') title = title[0] # link link = soup.find('a') link = link.get('href').encode('utf-8').strip() link = link.decode('utf-8') # photo photo = soup.find('img', attrs={'class': 'product-image'}) photo = photo.get('src').encode('utf-8').strip() photo = photo.decode('utf-8').split("/") photo = "https://productimages.hepsiburada.net/s/" + photo[ 4] + "/200/" + photo[6] # old price try: old_price = soup.find('del', attrs={'class': 'product-old-price'}) old_price = old_price.text.encode('utf-8').strip() \ .decode('utf8').replace(" TL", "").replace(".", "").replace(",", ".") except: old_price = 0 # new price try: new_price = soup.find('span', attrs={'class': 'product-old-price'}) new_price = new_price.text.encode('utf-8').strip().decode('utf8') \ .replace(" TL", "").replace(".", "").replace(",", ".") except: new_price = soup.find('span', attrs={'class': 'product-price'}) new_price = new_price.text.encode('utf-8').strip().decode('utf8') \ .replace(" TL", "").replace(".", "").replace(",", ".") # discount try: discount = soup.find('div', attrs={'class': 'discount-badge'}) soup = BeautifulSoup(discount.encode('utf-8'), "html.parser") discount = soup.find('span') discount = discount.text.encode('utf-8').strip().decode( 'utf8').replace(" TL", "") except: discount = 0 # currency currency = 'TRY' # category category_id = self.product(link) if category_id <= 0: continue # image path image_path = image_paths[category_id] # insert product_id = mysql.insertProduct(self.site_id, category_id, title, new_price, old_price, discount, currency, link) if product_id: curl.download( photo, Config.product_image_path() + '/' + image_path + '/' + str(product_id) + '.jpg') print('Added: ' + str(product_id)) print('Category: ' + str(category_id)) print('Image Path: ' + str(image_path)) if os.path.exists(Config.product_image_path() + '/' + image_path + '/' + str(product_id) + '.jpg'): print('Image is added.') else: print('Image is not added.') print('Url: ' + link) print() print() print() # sleep time.sleep(1) except: continue
def category(self, category, url, page, image_paths, mysql): """ Fetch all product in given url, and insert to database """ print('Page: ' + str(page)) print() print() print() curl = Curl() body = curl.fetch(url) soup = BeautifulSoup(body, "html.parser") products = soup.findAll('div', attrs={'class': 'pro-product'}) for product in products: try: soup = BeautifulSoup(product.encode('utf-8'), "html.parser") # title title = soup.find('img', attrs={'class': 'visible'}) title = title.get('alt').encode('utf-8').strip() title = title.decode('utf-8').split('/////') title = title[0] # link link = soup.find('a', attrs={'class': 'pro-product-title'}) link = link.get('href').encode('utf-8').strip() link = link.decode('utf-8') # photo photo = soup.find('img', attrs={'class': 'visible'}) photo = photo.get('data-original').encode('utf-8').strip() photo = photo.decode('utf-8').replace("480/640", "200/200") # old price try: old_price = soup.find( 'div', attrs={'data-pro-product-info': 'actual_price'}) old_price = old_price.text.encode('utf-8').strip() \ .decode('utf8').replace(" TL", "").replace(".", "").replace(",", ".") except: old_price = 0 # new price try: new_price = soup.find( 'div', attrs={'data-pro-product-info': 'sale_price'}) new_price = new_price.text.encode('utf-8').strip().decode('utf8') \ .replace(" TL", "").replace(".", "").replace(",", ".") except: new_price = 0 # discount try: if float(old_price) == 0: discount = 0 else: discount = round( (1 - (float(new_price) / float(old_price))) * 100) except: discount = 0 # currency currency = 'TRY' # category category_id = self.product(title) if category_id <= 0: if category == 'women': category_id = 11 elif category == 'men': category_id = 16 elif category == 'baby': category_id = 32 elif category == 'cosmetic': category_id = 26 else: category_id = 27 # image path image_path = image_paths[category_id] # insert product_id = mysql.insertProduct(self.site_id, category_id, title, new_price, old_price, discount, currency, link) if product_id: curl.download( photo, Config.product_image_path() + '/' + image_path + '/' + str(product_id) + '.jpg') print('Added: ' + str(product_id)) print('Category: ' + str(category_id)) print('Image Path: ' + str(image_path)) if os.path.exists(Config.product_image_path() + '/' + image_path + '/' + str(product_id) + '.jpg'): print('Image is added.') else: print('Image is not added.') print('Url: ' + link) print() print() print() # sleep time.sleep(self.sleep) except: continue
def post(self): """ Search Photo """ parser = reqparse.RequestParser(bundle_errors=True) parser.add_argument('unique_id', required=True, help="Unique ID is required.") parser.add_argument('photo', type=werkzeug.FileStorage, location='files') args = parser.parse_args() unique_id = args['unique_id'] """ Save Image """ image_name = Config.search_image_path() + '/' + unique_id + '.jpg' if args['photo']: photo = args['photo'] photo.save(image_name) """ Resize Image """ try: im = Image.open(image_name) width, height = im.size new_width = 128 new_height = new_width * height / width size = new_width, new_height im.thumbnail(size, Image.ANTIALIAS) im.save(image_name, "JPEG") except: return {'success': False}, 400 """ Image Engine """ compare = Compare() result = compare.find(image_name, 'search', '', DeepFeature()) """ Products """ product_id_list = [] for image in result: product_image = image['idx'].split('/') product_id = product_image[-1].split('.')[0] product_id_list.append(product_id) mysql = MySQL() image_paths = mysql.getImagePaths(False) products = mysql.getProducts(product_id_list) response = [] for row in products: response.append({ 'url': row['ecommerce_url'] + '/' + row['link'], 'image': Config.base_url() + '/' + Config.product_image_path() + '/' + image_paths[row['category_id']] + '/' + str(row['id']) + '.jpg', 'name': mysql.latinToUnicode(row['name']), 'price': str(row['price']), 'currency': str(row['currency']), 'discount': str(row['discount']), 'merchant': row['ecommerce_name'], }) register = {'unique_id': args['unique_id'], 'products': response} return register, 201
def __init__(self): self.product_path = Config.product_image_path() self.collection_csv_path = Config.image_collection_path() self.set_collection() self.collection = pd.read_csv(self.collection_csv_path)