def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.review_json = None
        self.price_json = None

        self.reviews_tree = None
        self.max_score = None
        self.min_score = None
        self.review_count = 0
        self.average_review = None
        self.reviews = None
        self.feature_count = None
        self.features = None
        self.video_urls = -1
        self.video_count = None
        self.pdf_urls = None
        self.pdf_count = None
        self.is_review_checked = False
        self.product_info_json = None
        self.is_product_info_json_checked = False
        self.variant_info_jsons = None
        self.is_variant_info_jsons_checked = False
        self.description = None
        self.long_description = None
        self.bullet_list = None
    def __init__(self, **kwargs):
        for method in map(lambda name: getattr(self.__class__, name),
                          dir(self)):
            if hasattr(method, 'apikey'):
                self.DATA_TYPES[method.apikey] = method

        Scraper.__init__(self, **kwargs)
Пример #3
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.product_json = None
        self.is_product_json_checked = False
        self.pv = PepperfryVariants()
Пример #4
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        self.version = None

        self.tv = TargetVariants()
        self.product_json = None
        self.image_json = None

        self.item_info = None
        self.parent_item_info = None
        self.item_info_checked = False

        self.categories = []
        self.categories_checked = False

        self.no_longer_available = 0

        self.wc_360 = 0
        self.wc_emc = 0
        self.wc_video = 0
        self.wc_pdf = 0
        self.wc_prodtour = 0
        self.is_webcollage_contents_checked = False
        self.is_video_checked = False
        self.video_urls = []
Пример #5
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.wc_360 = 0
        self.wc_emc = 0
        self.wc_video = 0
        self.wc_pdf = 0
        self.wc_prodtour = 0

        self.videos = None
        self.videos_checked = False

        self.variants = None
        self.variants_checked = False

        self.pricearea_html = None
        self.pricearea_html_checked = False

        self.product_xml = None
        self.product_xml_checked = False

        self.product_details = None
        self.product_details_checked = False

        self.product_json = None
    def __init__(self, **kwargs):# **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        # whether product has any webcollage media
        self.review_json = None
        self.review_list = None
        self.is_review_checked = False
Пример #7
0
    def __init__(self, **kwargs):
        Scraper.__init__(self, **kwargs)

        self.reviews_checked = False
        self.reviews_json = None

        self.prod_jsons_checked = False
        self.prod_jsons = None
Пример #8
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        self.review_list = None
        self.average_review = None
        self.max_review = None
        self.min_review = None
        self.review_count = 0
        self.is_review_checked = False
    def __init__(self, **kwargs):
        Scraper.__init__(self, **kwargs)

        self.images_checked = False
        self.images = None

        self.reviews_checked = False
        self.review_values = None
        self.reviews_tree = None
Пример #10
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # product features list
        self.features = None
        self.extracted_features = False
        self.images = None
        self.extracted_images = False
        self.videos = None
        self.extracted_videos = False
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.reviews = None
        self.average_review = 0
        self.max_review = 0
        self.min_review = 0
        self.review_count = 0
        self.is_review_checked = False
        self.fv = FlipkartVariants()
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        self.extracted_webcollage_contents = False
        self.webcollage_contents = None
        self.has_webcollage_360_view = False
        self.has_webcollage_emc_view = False
        self.has_webcollage_video_view = False
        self.has_webcollage_pdf = False
        self.has_webcollage_product_tour_view = False
        self.webcollage_videos = []
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.product_json = None
        self.variation_json = None
        # whether product has any webcollage media
        self.review_json = None
        self.review_list = None
        self.is_review_checked = False
        self.hv = HouseOffRaserVariants()
    def _extract_page_tree(self):
        Scraper._extract_page_tree(self)

        if self.ERROR_RESPONSE["failure_type"] == "HTTP 404 - Page Not Found":
            self.ERROR_RESPONSE["failure_type"] = None

            contents = self.load_page_from_url_with_number_of_retries(
                self.product_page_url)

            self.page_raw_text = contents
            self.tree_html = html.fromstring(contents)
Пример #15
0
    def __init__(self, **kwargs):# **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.review_json = None
        self.price_json = None
        self.failure_type = None
        self.review_list = None
        self.is_review_checked = False
        self.product_json = None
        self.uv = UniqloVariants()
        self.image_list = None
        self.is_image_crawled = False
Пример #16
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        self.reviews = None
        self.image_urls = None
        self.is_image_checked = False
        self.prod_help = None
        self.is_webcollage_checked = False
        self.wc_content = None
        self.sp_content = None
        self.is_video_checked = False
        self.video_urls = None
        self.widget_pdfs = None
        self.widget_videos = None
        self.widgets_checked = False
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.wc_360 = 0
        self.wc_emc = 0
        self.wc_video = 0
        self.wc_pdf = 0
        self.wc_prodtour = 0

        self.features = None
        self.ingredients = None
        self.images = None
        self.videos = None
        self.reviews = None
Пример #18
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        self.wg = WagVariants()
        self.product_json = None
        self.image_json = None
        self.reviews = None
        self.wc_360 = 0
        self.wc_emc = 0
        self.wc_video = 0
        self.wc_pdf = 0
        self.wc_prodtour = 0
        self.is_webcollage_contents_checked = False
        self.is_video_checked = False
        self.video_urls = []
    def __init__(self, **kwargs):# **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        self.product_info_json = None

        self.failure_type = None

        self.wcv = WalmartCAVariants()

        self.review_json = None
        self.review_list = None
        self.is_review_checked = False
        self.product_json = None
        self.variant_json = None
        self.list_out_of_stock = ['70', '80', '85', '87', '90']
        self.list_not_sold_online = ['85', '87', '90']
Пример #20
0
    def __init__(self, **kwargs):# **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.product_json = None
        self.buy_stack_json = None
        # whether product has any webcollage media
        self.review_json = None
        self.rev_list = None
        self.is_review_checked = False
        self.lv = LeviVariants()
        self.proxy_host = self.CRAWLERA_HOST
        self.proxy_port = self.CRAWLERA_PORT
        self.proxy_auth = HTTPProxyAuth(self.CRAWLERA_APIKEY, "")
        self.proxies = {"http": "http://{}:{}/".format(self.proxy_host, self.proxy_port)}
        self.proxy_config = {"proxy_auth": self.proxy_auth, "proxies": self.proxies}
Пример #21
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.product_json = None
        self.breadcrumb_list = None
        self.product = None
        self.full_description = None
        self.reviews = None
        self.images = None
        self.is_review_checked = False
        self.is_webcollage_checked = False
        self.webcollage_content = None
        self.wc_360 = 0
        self.wc_emc = 0
        self.wc_video = 0
        self.wc_pdf = 0
        self.wc_prodtour = 0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.review_json = None
        self.review_list = None
        self.is_review_checked = False
        self.price_json = None

        self.is_analyze_media_contents = False
        self.video_urls = None
        self.video_count = 0
        self.pdf_urls = None
        self.pdf_count = 0
        self.wc_emc = 0
        self.wc_prodtour = 0
        self.wc_360 = 0
        self.wc_video = 0
        self.wc_pdf = 0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.wc_360 = 0
        self.wc_emc = 0
        self.wc_video = 0
        self.wc_pdf = 0
        self.wc_prodtour = 0

        self.features = None
        self.ingredients = None
        self.images = None
        self.videos = None
        self.json_data = None
        self.review_json = None
        self.review_list = None
        self.REVIEW_URL = "http://api.bazaarvoice.com/data/batch.json?passkey=e8bg3vobqj42squnih3a60fui&apiversion=5.5&displaycode=6543-en_us&resource.q0=products&filter.q0=id%3Aeq%3A{0}&stats.q0=questions%2Creviews&filteredstats.q0=questions%2Creviews"
        self.av = VerizonWirelessVariants()
Пример #24
0
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        self.webcollage_content = None
        self.webcollage_checked = False
        self.wc_video = 0

        self.reviews = None
        self.reviews_html = None
        self.is_review_checked = False

        self.proxy_host = self.CRAWLERA_HOST
        self.proxy_port = self.CRAWLERA_PORT
        self.proxy_auth = HTTPProxyAuth(self.CRAWLERA_APIKEY, "")
        self.proxies = {
            "http": "http://{}:{}/".format(self.proxy_host, self.proxy_port)
        }
        self.proxy_config = {
            "proxy_auth": self.proxy_auth,
            "proxies": self.proxies
        }
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.product_json = None
        # whether product has any webcollage media
        self.review_json = None
        self.review_list = None
        self.is_review_checked = False
        self.store_url = 'http://www.hayneedle.com'
        self.nv = NikeVariants()
        self.variants = None
        self.is_variant_checked = False
        self.proxy_host = self.CRAWLERA_HOST
        self.proxy_port = self.CRAWLERA_PORT
        self.proxy_auth = HTTPProxyAuth(self.CRAWLERA_APIKEY, "")
        self.proxies = {
            "http": "http://{}:{}/".format(self.proxy_host, self.proxy_port)
        }
        self.proxy_config = {
            "proxy_auth": self.proxy_auth,
            "proxies": self.proxies
        }
    def _no_image(self, url):
        """Overwrites the _no_image
        in the base class with an additional test.
        Then calls the base class no_image.

        Returns True if image in url is a "no image"
        image, False if not
        """

        # if image name is "no_image", return True
        if re.match(".*no.image\..*", url):
            return True
        else:
            return Scraper._no_image(self, url)
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.review_json = None
        self.price_json = None
        self.mv = MacysVariants()

        self.reviews_tree = None
        self.max_score = None
        self.min_score = None
        self.review_count = None
        self.average_review = None
        self.reviews = None
        self.feature_count = None
        self.features = None
        self.video_urls = None
        self.video_count = None
        self.pdf_urls = None
        self.pdf_count = None
        self.is_review_checked = False
        self.product_info_json = None
        self.is_product_info_json_checked = False
        self.is_bundle = False
    def __init__(self, **kwargs):  # **kwargs are presumably (url, bot)
        Scraper.__init__(self, **kwargs)

        # whether product has any webcollage media
        self.failure_type = None
        self.kv = KohlsVariants()

        self.review_json = None
        self.review_list = None
        self.is_review_checked = False

        self.variants = None
        self.is_variant_checked = False

        self.wc_360 = 0
        self.wc_emc = 0
        self.wc_video = 0
        self.wc_pdf = 0
        self.wc_prodtour = 0
        self.is_webcollage_contents_checked = False
        self.is_video_checked = False
        self.product_info_json = None
        self.is_product_info_json_checked = False
        self.video_urls = []
Пример #29
0
 def __init__(self, **kwargs):# **kwargs are presumably (url, bot)
     Scraper.__init__(self, **kwargs)
Пример #30
0
    def __init__(self, **kwargs):
        Scraper.__init__(self, **kwargs)

        self.product_data = None
        self.product_data_checked = False