def run(self, product_id, datafeed_id): cond = { 'datafeed_id': datafeed_id, } if product_id: cond.update({'id': product_id}) with open('recalc_shopstyle_final.txt', 'w') as fh, open('recalc_shopstyle_final_err.txt', 'w') as eh: cnt = 0 for p in Product.filter(**cond).iterator(): raw = p.categories_raw.split('/') if p.categories_raw else [] market = p.market_category.split( '>') if p.market_category else [] phrases = [p.name ] + raw + market + [' '.join(p.params.values())] ctg, gnd, *_ = detect_category(phrases) if not all((ctg, gnd)): eh.write('{}\n'.format(p.id)) ctg = ctg or CATEGORY_UNKNOWN gnd = gnd or GENDER_UNDEFINED category_id, gender_id = CATEGORIES[ctg], CATEGORIES[gnd] if not (p.category == category_id and p.gender == gender_id): fh.write('{};{};{}\n'.format(p.id, category_id, gender_id)) cnt += 1 if cnt % 10000 == 0: print('Processed ', cnt)
def datafeed_process(self, params): datafeed = Datafeed.get(id=params.get('datafeed_id')) if os.path.exists(datafeed.log['download_file']): start_time = time.time() products_for_del = Product.filter( Product.retailer_id == datafeed.retailer_id, ~(Product.datafeed_id == datafeed.id), Product.active == True) images_for_del = Picture.filter( Picture.product << products_for_del) importer = ImportDatafeed(self._app) try: upd, ins = importer.run(datafeed.log['download_file'], datafeed.retailer.id, datafeed_id=datafeed.id) except Exception as e: datafeed.status = Datafeed.STATUS_ERROR datafeed.log.update( {'import_error': 'Exception: {}'.format(e)}) else: datafeed.status = Datafeed.STATUS_COMPLETED datafeed.log.update({ 'offers_new': ins, 'offers_updated': upd, 'offers_deleted': products_for_del.count(), 'images_deleted': images_for_del.count(), 'offers_active': Product.filter(retailer_id=datafeed.retailer_id, active=True).count(), }) else: datafeed.status = Datafeed.STATUS_ERROR datafeed.log.update({'process_error': 'file {} does not exists'\ .format(datafeed.log['download_file'])}) datafeed.imported = datetime.datetime.now() datafeed.save(only=[Datafeed.status, Datafeed.log, Datafeed.imported]) os.unlink(datafeed.log['download_file'])