Exemplo n.º 1
0
    def __init__(self, search_sort='default', *args, **kwargs):

        super(OzonProductsSpider, self).__init__(
            url_formatter=FormatterWithDefaults(
                search_sort=self.SEARCH_SORT[search_sort]
            ),
            *args, **kwargs)
Exemplo n.º 2
0
 def __init__(self, search_sort='default', *args, **kwargs):
     self.order = self.SEARCH_SORT[search_sort]
     super(EBuyerProductSpider,
           self).__init__(url_formatter=FormatterWithDefaults(),
                          site_name=self.allowed_domains[0],
                          *args,
                          **kwargs)
Exemplo n.º 3
0
    def __init__(self, order='default', *args, **kwargs):
        # Handle multiple allowed domains
        if getattr(self, 'allowed_domains', None):
            if len(self.allowed_domains) > 1 and 'site' not in kwargs:
                kwargs['site_name'] = self.allowed_domains[0]

        # Decorate optional fields request and parse methods
        # with _option_requester and _option_parser
        for key in self.OPTIONAL_REQUESTS.keys():
            method_name = '_parse_%s' % key
            old_method = getattr(self, method_name)
            new_method = types.MethodType(option_parser(old_method), self)
            setattr(self.__class__, method_name, new_method)
            method_name = '_request_%s' % key
            old_method = getattr(self, method_name)
            new_method = types.MethodType(
                option_requester(key)(old_method), self)
            setattr(self.__class__, method_name, new_method)

        # Creating the list of optional fields to be scraped
        self.options = [
            k for k, v in self.OPTIONAL_REQUESTS.iteritems()
            if (v or kwargs.get(k, False)) and kwargs.get(k, True)
        ]

        # Handle sort modes
        sort_mode = self.SORT_MODES.get(order, None)
        if self.SORT_MODES:
            if sort_mode is None:
                self.log('Sort mode "%s" is not defined' % order)
        self.sort_mode = sort_mode
        formatter = FormatterWithDefaults(sort_mode=sort_mode)

        super(ProductsSpider, self).__init__(formatter, *args, **kwargs)
Exemplo n.º 4
0
 def __init__(self, search_sort='POPULAR', *args, **kwargs):
     self.start_pos = 0
     super(NextCoUkProductSpider,
           self).__init__(site_name=self.allowed_domains[0],
                          url_formatter=FormatterWithDefaults(
                              search_sort=self._SORT_MODES[search_sort]),
                          *args,
                          **kwargs)
Exemplo n.º 5
0
 def __init__(self, sort_mode=None, *args, **kwargs):
     self.SORT = self._parse_sort(sort_mode)
     self.pages = dict()
     super(PepperfryProductsSpider, self).__init__(
         url_formatter=FormatterWithDefaults(sort_mode=self.SORT[0],
                                             dir=self.SORT[1]),
         site_name=self.allowed_domains[0],
         *args,
         **kwargs)
Exemplo n.º 6
0
    def __init__(self, search_sort='recommended', *args, **kwargs):
        self.br = BuyerReviewsBazaarApi(called_class=self)

        super(HalfordsProductSpider, self).__init__(
            site_name=self.allowed_domains[0],
            url_formatter=FormatterWithDefaults(
                sort=self._SORT_MODES[search_sort]
            ),
            *args, **kwargs)
Exemplo n.º 7
0
 def __init__(self, search_sort='best_match', *args, **kwargs):
     if "search_modes" in kwargs:
         search_sort = kwargs["search_modes"]
     super(AutozoneProductsSpider,
           self).__init__(url_formatter=FormatterWithDefaults(
               search_sort=self.SEARCH_SORT[search_sort]),
                          site_name="autozone.com",
                          *args,
                          **kwargs)
    def __init__(self, search_sort='NEWEST', *args, **kwargs):
        self.br = BuyerReviewsBazaarApi(called_class=self)

        super(HouseoffraserProductSpider,
              self).__init__(site_name=self.allowed_domains[0],
                             url_formatter=FormatterWithDefaults(
                                 sort_mode=self._SORT_MODES[search_sort]),
                             *args,
                             **kwargs)
Exemplo n.º 9
0
 def __init__(self, sort_mode="default", *args, **kwargs):
     if sort_mode not in self.SORT_MODES:
         self.log('"%s" not in SORT_MODES')
         sort_mode = 'default'
     formatter = FormatterWithDefaults(sort_by=self.SORT_MODES[sort_mode])
     super(SouqProductsSpider,
           self).__init__(formatter,
                          site_name=self.allowed_domains[0],
                          *args,
                          **kwargs)
Exemplo n.º 10
0
 def __init__(self, order='default', *args, **kwargs):
     if order not in self.SORT_MODES.keys():
         self.log(
             "'%s' not in SORT_MODES. Used default for this session" %
             order, WARNING)
         order = 'default'
     search_sort = self.SORT_MODES[order]
     super(MaplinProductsSpider, self).__init__(
         url_formatter=FormatterWithDefaults(search_sort=search_sort, ),
         *args,
         **kwargs)
Exemplo n.º 11
0
 def __init__(self, search_sort='best_match', zip_code='M3C',
              search_order='default', *args, **kwargs):
     if zip_code:
         self.zip_code = zip_code
     super(WalmartCaProductsSpider, self).__init__(
         site_name=self.allowed_domains[0],
         url_formatter=FormatterWithDefaults(
             search_sort=self._SEARCH_SORT[search_sort],
             search_order=self._SEARCH_ORDER[search_order]
         ),
         *args, **kwargs)
Exemplo n.º 12
0
 def __init__(self, search_sort='default', *args, **kwargs):
     # All this is to set the site_name since we have several
     # allowed_domains.
     super(PGShopProductSpider,
           self).__init__(url_formatter=FormatterWithDefaults(
               search_sort=self.SEARCH_SORT[search_sort],
               start=0,
           ),
                          site_name=self.allowed_domains[0],
                          *args,
                          **kwargs)
Exemplo n.º 13
0
 def __init__(self, search_sort='default', *args, **kwargs):
     self.search_sort = self.SEARCH_SORT[search_sort]
     self.new_stile = False
     # used to store unique links
     self.links = []
     # used to store all response from new_stile site version
     # to prevent make additional requests
     self.initial_responses = []
     super(CoachSpider, self).__init__(
         url_formatter=FormatterWithDefaults(
             search_sort=self.search_sort,
         ), *args, **kwargs
     )
Exemplo n.º 14
0
    def __init__(self, sort_mode=None, *args, **kwargs):
        if sort_mode:
            if sort_mode.lower() not in self.SORT_MODES:
                self.log('"%s" not in SORT_MODES')
            else:
                self.SORTING = self.SORT_MODES[sort_mode.lower()]

        super(RiverislandProductsSpider, self).__init__(
            url_formatter=FormatterWithDefaults(
                sort_mode=self.SORTING or self.SORT_MODES['default']),
            site_name=self.allowed_domains[0],
            *args,
            **kwargs)
Exemplo n.º 15
0
    def __init__(self, sort_mode=None, *args, **kwargs):
        if sort_mode:
            if sort_mode.lower() not in self.SORT_MODES:
                self.log('"%s" not in SORT_MODES')
            else:
                self.SORTING = self.SORT_MODES[sort_mode.lower()]

        super(WalGreensProductsSpider, self).__init__(
            site_name=self.allowed_domains[0],
            url_formatter=FormatterWithDefaults(
                page=self.page,
                sort_mode=self.SORTING or self.SORT_MODES['relevance'],),
            *args,
            **kwargs)
Exemplo n.º 16
0
 def __init__(self,
              sort_mode=None,
              fetch_related_products=True,
              store_id=10151,
              catalog_id=24551,
              lang_id=110,
              *args,
              **kwargs):
     self.fetch_related_products = fetch_related_products
     if sort_mode in self.SORT_MODES:
         sort_mode = self.SORT_MODES[sort_mode]
         self.SEARCH_URL = self.SORT_SEARCH_URL
         self.SORTING = sort_mode
         formatter = FormatterWithDefaults(sort_mode=sort_mode,
                                           store_id=store_id,
                                           catalog_id=catalog_id,
                                           lang_id=lang_id)
     else:
         self.log('"%s" not in SORT_MODES')
         self.SORTING = self.SORT_MODES['relevance']
         formatter = FormatterWithDefaults()
     cond_set_value(kwargs, 'site_name', 'argos.co.uk')
     super(ArgosUKProductsSpider, self).__init__(formatter, *args, **kwargs)
Exemplo n.º 17
0
    def __init__(self, sort_mode=None, *args, **kwargs):
        formatter = None
        if sort_mode:
            if sort_mode in self.SORT_MODES:
                formatter = FormatterWithDefaults(
                    CM=self.SORT_MODES[sort_mode]['CM'],
                    T1=self.SORT_MODES[sort_mode]['T1'])
                self.SEARCH_URL = self.SEARCH_URL2

        super(BootsProductsSpider,
              self).__init__(formatter,
                             site_name=self.allowed_domains[0],
                             *args,
                             **kwargs)
Exemplo n.º 18
0
    def __init__(self, sort_mode=None, *args, **kwargs):
        self.buyer_reviews = BuyerReviewsBazaarApi(called_class=self)
        if sort_mode:
            if sort_mode.lower() not in self.SORT_MODES:
                self.log('"%s" not in SORT_MODES')
            else:
                self.SORTING = self.SORT_MODES[sort_mode.lower()]

        super(JcpenneyProductsSpider,
              self).__init__(url_formatter=FormatterWithDefaults(
                  sort_mode=self.SORTING or self.SORT_MODES['default']),
                             site_name=self.allowed_domains[0],
                             *args,
                             **kwargs)
        settings.overrides['CONCURRENT_REQUESTS'] = 1
        self.user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'
Exemplo n.º 19
0
    def __init__(self, sort_mode=None, *args, **kwargs):
        if sort_mode:
            if sort_mode.lower() not in self.SORT_MODES:
                self.log('"%s" not in SORT_MODES')
            else:
                self.SORTING = self.SORT_MODES[sort_mode.lower()]

        # do not hit 404 multiple times
        #settings.overrides['RETRY_HTTP_CODES'] \
        #    = [c for c in settings['RETRY_HTTP_CODES'] if c != 404]

        super(UltaProductSpider, self).__init__(
            url_formatter=FormatterWithDefaults(
                sort_mode=self.SORTING or self.SORT_MODES['default']),
            *args,
            **kwargs)
Exemplo n.º 20
0
 def __init__(self, order="default", *args, **kwargs):
     sort_mode = self.SORT_MODES.get(order)
     if sort_mode is None:
         raise Exception('%s sorting mode is not defined' % order)
     formatter = FormatterWithDefaults(sort_mode=sort_mode)
     super(ProswimwearCoUkSpider, self).__init__(formatter, *args, **kwargs)
Exemplo n.º 21
0
 def __init__(self, *args, **kwargs):
     self.sort_by = self.SORT_BY.get(kwargs.get('order', 'relevance'),
                                     'relevance')
     formatter = FormatterWithDefaults(sort=self.sort_by)
     super(AhProductsSpider, self).__init__(formatter, *args, **kwargs)
Exemplo n.º 22
0
 def __init__(self, search_sort='best_match', *args, **kwargs):
     super(FlipkartProductsSpider,
           self).__init__(url_formatter=FormatterWithDefaults(
               search_sort=self.SEARCH_SORT[search_sort]),
                          *args,
                          **kwargs)
Exemplo n.º 23
0
 def __init__(self, search_sort='best_sellers', *args, **kwargs):
     super(MorrisonsProductsSpider,
           self).__init__(url_formatter=FormatterWithDefaults(
               search_sort=self.SEARCH_SORT[search_sort]),
                          *args,
                          **kwargs)
Exemplo n.º 24
0
 def __init__(self, *args, **kwargs):
     super(AsdaProductsSpider, self).__init__(
         url_formatter=FormatterWithDefaults(pagenum=1, prods_per_page=32),
         *args,
         **kwargs)
 def __init__(self, order='relevance', *args, **kwargs):
     order = self.SEARCH_ORDER.get(order, 'relevance')
     formatter = FormatterWithDefaults(order=order, sort='asc')
     super(BabymonitorsdirectProductsSpider,
           self).__init__(formatter, *args, **kwargs)