def process_item(self, item, spider): for key in item.fields: url = item.get('url') if not item.get(key) and self.defines_xpath(spider, key): if key == 'hash_id': raise_missing_value( spider_name=spider.name, field_name=key, url=url, level=STATUS_CRITICAL, message="Missing Item hash ID.", category=DATABASE_ERROR ) raise DropItem else: raise_missing_value( spider_name=spider.name, field_name=key, url=url, level=STATUS_WARNING, message="Missing field.", category=DATABASE_ERROR ) return item
def load_price_currency_from_str(self, text): try: (price, currency) = parse_price_currency(text, self.context) self.add_value(F_PRICE, price) except Exception as e: self.add_value(F_PRICE, MISSING_VALUE) raise_missing_value(spider_name=self.context['spider_name'], field_name=F_PRICE, url=self.context[F_URL], exception=e, level=STATUS_CRITICAL, message="Could not load price and currency.", category=PARSING_ERROR)
def load_price_currency_from_str(self, text): try: (price, currency) = parse_price_currency(text, self.context) self.add_value(F_PRICE, price) except Exception as e: self.add_value(F_PRICE, MISSING_VALUE) raise_missing_value( spider_name=self.context['spider_name'], field_name=F_PRICE, url=self.context[F_URL], exception=e, level=STATUS_CRITICAL, message="Could not load price and currency.", category=PARSING_ERROR )
def load_price_currency(self, xpath, spider_name=''): try: text = ' '.join(self.selector.xpath(xpath).extract()) (price, currency) = parse_price_currency(text, self.context) self.add_value(F_PRICE, price) self.add_value(F_CURRENCY, currency) except Exception as e: self.add_value(F_PRICE, MISSING_VALUE) site = SITE_MODEL.objects.get(spider_name=spider_name) self.add_value(F_CURRENCY, site.country.currency) raise_missing_value(spider_name=self.context['spider_name'], field_name=F_PRICE, url=self.context[F_URL], exception=e, level=STATUS_CRITICAL, message="Could not load price and currency.", category=PARSING_ERROR)
def load_price_currency(self, xpath, spider_name=''): try: text = ' '.join(self.selector.xpath(xpath).extract()) (price, currency) = parse_price_currency(text, self.context) self.add_value(F_PRICE, price) self.add_value(F_CURRENCY, currency) except Exception as e: self.add_value(F_PRICE, MISSING_VALUE) site = SITE_MODEL.objects.get(spider_name=spider_name) self.add_value(F_CURRENCY, site.country.currency) raise_missing_value( spider_name=self.context['spider_name'], field_name=F_PRICE, url=self.context[F_URL], exception=e, level=STATUS_CRITICAL, message="Could not load price and currency.", category=PARSING_ERROR )
def process_item(self, item, spider): for key in item.fields: url = item.get('url') if not item.get(key) and self.defines_xpath(spider, key): if key == 'hash_id': raise_missing_value(spider_name=spider.name, field_name=key, url=url, level=STATUS_CRITICAL, message="Missing Item hash ID.", category=DATABASE_ERROR) raise DropItem else: raise_missing_value(spider_name=spider.name, field_name=key, url=url, level=STATUS_WARNING, message="Missing field.", category=DATABASE_ERROR) return item
def store_item(item, spider): """Store an item in the database.""" # This field won't go into the database, so we can get rid of it if FIELD_FINISHED in item: del item[FIELD_FINISHED] try: # Find the merchant deal, merchant = extract_item_data(dict(item)) merchant_obj = find_existing_merchant(merchant) if not merchant_obj: merchant_obj = MERCHANT_MODEL.objects.create(**merchant) deal['merchant'] = merchant_obj if hasattr(spider, 'site_id') and spider.site_id == -1: # If a site_id was not provided, we assume is user is testing the # spider, so we just create a TestItem TestItem.objects.create(**deal) else: # Otherwise, store the item model deal['site_id'] = spider.site_id deal_id = ITEM_MODEL.objects.create(**deal) if 'sold_count' in deal: log_data = {'deal': deal_id, 'date': deal['date_time'], 'value': deal['sold_count']} log = SoldCount.objects.create(**log_data) except Exception as e: raise_missing_value( spider_name=spider.name, url=item['url'], exception=e, level=STATUS_CRITICAL, message="Failed to store item.", category=DATABASE_ERROR )
def store_item(item, spider): """Store an item in the database.""" # This field won't go into the database, so we can get rid of it if FIELD_FINISHED in item: del item[FIELD_FINISHED] try: # Find the merchant deal, merchant = extract_item_data(dict(item)) merchant_obj = find_existing_merchant(merchant) if not merchant_obj: merchant_obj = MERCHANT_MODEL.objects.create(**merchant) deal['merchant'] = merchant_obj if hasattr(spider, 'site_id') and spider.site_id == -1: # If a site_id was not provided, we assume is user is testing the # spider, so we just create a TestItem TestItem.objects.create(**deal) else: # Otherwise, store the item model deal['site_id'] = spider.site_id deal_id = ITEM_MODEL.objects.create(**deal) if 'sold_count' in deal: log_data = { 'deal': deal_id, 'date': deal['date_time'], 'value': deal['sold_count'] } log = SoldCount.objects.create(**log_data) except Exception as e: raise_missing_value(spider_name=spider.name, url=item['url'], exception=e, level=STATUS_CRITICAL, message="Failed to store item.", category=DATABASE_ERROR)