Exemplo n.º 1
0
    def parse(self, response):
        if response.xpath(
                "//table[@id='ctl00_ctl00_ContentPlaceHolderMain_ContentPlaceHolderSupportMiddle_Table_REC']"):
            for row in response.xpath(
                    "//table[@id='ctl00_ctl00_ContentPlaceHolderMain_ContentPlaceHolderSupportMiddle_Table_REC']/tr[position() > 1]"):
                product = row.xpath(".//td[1]//text()").extract()[0]
                rev = row.xpath(".//td[3]//text()").extract()[0]
                href = row.xpath(".//td[4]//a/@href").extract()[0]

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("version", rev)
                item.add_value("url", SupermicroSpider.fix_url(href))
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
        else:
            for row in response.xpath(
                    "//table//table//table//table//table//tr[position() > 1]"):
                product = row.xpath(".//td[1]//text()").extract()[0]
                href = row.xpath(".//td[2]//a/@href").extract()[0]
                rev = row.xpath(".//td[4]//text()").extract()[0]

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("version", rev)
                item.add_value("url", SupermicroSpider.fix_url(href))
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 2
0
    def parse_product(self, response):
        mib = None

        if not response.body:
            return

        for entry in reversed(response.xpath("//table/tbody/tr")):
            if entry.xpath("./td[contains(@class, 'versionTd')]/select"):
                for i in range(
                        0, len(entry.xpath("./td[contains(@class, 'versionTd')]/select/option"))):
                    desc = entry.xpath(
                        "./td[contains(@class, 'typeTd')]/span/text()").extract()[i].lower()

                    if "firmware" in desc:
                        date = entry.xpath(
                            "./td[contains(@class, 'dateTd')]/span/text()").extract()[i]
                        ver = entry.xpath(
                            "./td[contains(@class, 'versionTd')]/select/option/text()").extract()[i]
                        href = entry.xpath(
                            "./td[contains(@class, 'downloadTd')]/div/a[1]/@data-filelink").extract()[i]

                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"])
                        item.add_value("version", ver)
                        item.add_value("date", date)
                        item.add_value("url", href)
                        item.add_value("product", response.meta["product"])
                        item.add_value("mib", mib)
                        item.add_value("vendor", self.name)
                        yield item.load_item()

            else:
                desc = entry.xpath(
                    "./td[contains(@class, 'typeTd')]//text()").extract()[1].lower()

                if "firmware" in desc:
                    date = entry.xpath(
                        "./td[contains(@class, 'dateTd')]//text()").extract()
                    ver = entry.xpath(
                        "./td[contains(@class, 'versionTd')]//text()").extract()
                    href = entry.xpath(
                        "./td[contains(@class, 'downloadTd')]//a/@data-filelink").extract()[0]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"])
                    item.add_value("version", ver)
                    item.add_value("date", date)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("mib", mib)
                    item.add_value("vendor", self.name)
                    yield item.load_item()

                elif "mib" in desc:
                    mib = entry.xpath(
                        "./td[contains(@class, 'downloadTd')]//a/@href").extract()[0]
Exemplo n.º 3
0
    def parse_product(self, response):
        mib = None

        if not response.body:
            return

        for entry in reversed(response.xpath("//table/tbody/tr")):
            if entry.xpath("./td[contains(@class, 'versionTd')]/select"):
                for i in range(
                        0, len(entry.xpath("./td[contains(@class, 'versionTd')]/select/option"))):
                    desc = entry.xpath(
                        "./td[contains(@class, 'typeTd')]/span/text()").extract()[i].lower()

                    if "firmware" in desc:
                        date = entry.xpath(
                            "./td[contains(@class, 'dateTd')]/span/text()").extract()[i]
                        ver = entry.xpath(
                            "./td[contains(@class, 'versionTd')]/select/option/text()").extract()[i]
                        href = entry.xpath(
                            "./td[contains(@class, 'downloadTd')]/div/a[1]/@data-filelink").extract()[i]

                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"])
                        item.add_value("version", ver)
                        item.add_value("date", date)
                        item.add_value("url", href)
                        item.add_value("product", response.meta["product"])
                        item.add_value("mib", mib)
                        item.add_value("vendor", self.name)
                        yield item.load_item()

            else:
                desc = entry.xpath(
                    "./td[contains(@class, 'typeTd')]//text()").extract()[1].lower()

                if "firmware" in desc:
                    date = entry.xpath(
                        "./td[contains(@class, 'dateTd')]//text()").extract()
                    ver = entry.xpath(
                        "./td[contains(@class, 'versionTd')]//text()").extract()
                    href = entry.xpath(
                        "./td[contains(@class, 'downloadTd')]//a/@data-filelink").extract()[0]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"])
                    item.add_value("version", ver)
                    item.add_value("date", date)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("mib", mib)
                    item.add_value("vendor", self.name)
                    yield item.load_item()

                elif "mib" in desc:
                    mib = entry.xpath(
                        "./td[contains(@class, 'downloadTd')]//a/@href").extract()[0]
Exemplo n.º 4
0
    def parse_json(self, response):
        json_response = json.loads(response.body_as_unicode())

        if "products" in json_response:
            for product in json_response["products"]:
                yield Request(
                    url=urlparse.urljoin(response.url, "?product=%s" % (product["slug"])),
                    headers={"Referer": response.url, "X-Requested-With": "XMLHttpRequest"},
                    meta={"product": product["slug"]},
                    callback=self.parse_json,
                )

        if "url" in response.meta:
            item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
            item.add_value("url", response.meta["url"])
            item.add_value("product", response.meta["product"])
            item.add_value("date", response.meta["date"])
            item.add_value("description", response.meta["description"])
            item.add_value("build", response.meta["build"])
            item.add_value("version", response.meta["version"])
            item.add_value("sdk", json_response["download_url"])
            item.add_value("vendor", self.name)
            yield item.load_item()

        elif "product" in response.meta:
            for entry in json_response["downloads"]:
                if entry["category__slug"] == "firmware":

                    if entry["sdk__id"]:
                        yield Request(
                            url=urlparse.urljoin(response.url, "?gpl=%s&eula=True" % (entry["sdk__id"])),
                            headers={"Referer": response.url, "X-Requested-With": "XMLHttpRequest"},
                            meta={
                                "product": response.meta["product"],
                                "date": entry["date_published"],
                                "build": entry["build"],
                                "url": entry["file_path"],
                                "version": entry["version"],
                                "description": entry["name"],
                            },
                            callback=self.parse_json,
                        )
                    else:
                        item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                        item.add_value("url", entry["file_path"])
                        item.add_value("product", response.meta["product"])
                        item.add_value("date", entry["date_published"])
                        item.add_value("description", entry["name"])
                        item.add_value("build", entry["build"])
                        item.add_value("version", entry["version"])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
Exemplo n.º 5
0
    def parse_product(self, response):
        self.logger.debug("Parsing %s..." % response.url)
        tmp = response.url.split('/')[-2]

        version = ""
        if tmp[0] != 'v':
            links = response.css(
                "div.hardware-version dl.select-version li a::attr(href)"
            ).extract()
            if len(links):
                version = links[0].split('/')[-2]
                del links[0]
                for link in links:
                    yield response.follow(link,
                                          meta=response.meta,
                                          callback=self.parse_product)

        firmwares = response.css("#content_Firmware > table")
        self.logger.debug("%s %s: %d binary firmware found." %
                          (response.meta["product"], version, len(firmwares)))
        for firmware in firmwares:
            spans = firmware.css('tr.detail-info span')
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%y-%m-%d"])
            item.add_value("vendor", self.vendor)
            item.add_value("url", firmware.css("a::attr(href)").get())
            item.add_value("date", spans[1].css("::text").get().strip())
            item.add_value("language", spans[3].css("::text").get().strip())
            item.add_value("size", spans[5].css("::text").get().strip())
            item.add_value("description",
                           "\n".join(firmware.css('td.more p').getall()))
            item.add_value("product", response.meta["product"])
            item.add_value("category", response.meta["category"])
            item.add_value("version", version)
            yield item.load_item()

        gpl_source_codes = response.css("#content_GPL-Code a")
        self.logger.debug(
            "%s %s: %d gpl source code found." %
            (response.meta["product"], version, len(gpl_source_codes)))
        for gpl in gpl_source_codes:
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%d/%m/%y"])
            item.add_value("vendor", self.vendor)
            item.add_value("url", gpl.css("a::attr(href)").get())
            item.add_value("product", response.meta["product"])
            item.add_value("category", response.meta["category"])
            item.add_value("version", version)
            yield item.load_item()
Exemplo n.º 6
0
    def parse(self, response):
        if response.xpath("//form[@name='UCagreement']"):
            for href in response.xpath(
                    "//div[@id='productAndDoc']").extract()[0].split('"'):
                if "downloads.polycom.com" in href:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", response.meta["date"])
                    item.add_value("description", response.meta["description"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()

        elif response.xpath("//div[@id='ContentChannel']"):
            for entry in response.xpath("//div[@id='ContentChannel']//li"):
                if not entry.xpath("./a"):
                    continue

                text = entry.xpath("./a//text()").extract()[0]
                href = entry.xpath("./a/@href").extract()[0].strip()
                date = entry.xpath("./span//text()").extract()

                path = urlparse.urlparse(href).path

                if any(x in text.lower() for x in ["end user license agreement", "eula", "release notes",
                                                   "mac os", "windows", "guide", "(pdf)", "sample"]) or href.endswith(".pdf"):
                    continue

                elif any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower():
                    yield Request(
                        url=urlparse.urljoin(
                            response.url, PolycomSpider.fix_url(href)),
                        meta={"product": response.meta["product"] if "product" in response.meta else text,
                              "date": date, "version": FirmwareLoader.find_version_period([text]), "description": text},
                        headers={"Referer": response.url},
                        callback=self.parse)

                elif path:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"])
                    item.add_value(
                        "version", FirmwareLoader.find_version_period([text]))
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", item.find_date(date))
                    item.add_value("description", text)
                    # item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 7
0
    def parse_json(self, response):
        json_response = json.loads(response.body_as_unicode())

        if "products" in json_response:
            for product in json_response["products"]:
                yield Request(
                    url=urlparse.urljoin(
                        response.url, "?product=%s" % (product["slug"])),
                    headers={"Referer": response.url,
                             "X-Requested-With": "XMLHttpRequest"},
                    meta={"product": product["slug"]},
                    callback=self.parse_json)

        if "url" in response.meta:
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y-%m-%d"])
            item.add_value("url", response.meta["url"])
            item.add_value("product", response.meta["product"])
            item.add_value("date", response.meta["date"])
            item.add_value("description", response.meta["description"])
            item.add_value("build", response.meta["build"])
            item.add_value("version", response.meta["version"])
            item.add_value("sdk", json_response["download_url"])
            item.add_value("vendor", self.name)
            yield item.load_item()

        elif "product" in response.meta:
            for entry in json_response["downloads"]:
                if entry["category__slug"] == "firmware":

                    if entry["sdk__id"]:
                        yield Request(
                            url=urlparse.urljoin(
                                response.url, "?gpl=%s&eula=True" % (entry["sdk__id"])),
                            headers={"Referer": response.url,
                                     "X-Requested-With": "XMLHttpRequest"},
                            meta={"product": response.meta["product"], "date": entry["date_published"], "build": entry[
                                "build"], "url": entry["file_path"], "version": entry["version"], "description": entry["name"]},
                            callback=self.parse_json)
                    else:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                        item.add_value("url", entry["file_path"])
                        item.add_value("product", response.meta["product"])
                        item.add_value("date", entry["date_published"])
                        item.add_value("description", entry["name"])
                        item.add_value("build", entry["build"])
                        item.add_value("version", entry["version"])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
Exemplo n.º 8
0
    def parse_url(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                if "package/" not in text:
                    product = "%s-%s" % (response.meta["product"], text[0: -1]) if "product" in response.meta else text[0: -1]

                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"version": response.meta[
                            "version"], "product": product},
                        callback=self.parse_url)
            elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value("date", item.find_date(
                    link.xpath("following::text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 9
0
    def parse_download(self, response):
        for firmware in response.xpath(
                "//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"
        ):
            product = response.xpath(
                "//div[@class='prodNavHeaderBody']//text()").extract(
                )[0].replace(" Support & Drivers", "")
            date = firmware.xpath(
                ".//ul[@class='dateVersion']//strong/text()").extract()
            version = firmware.xpath(
                ".//ul[@class='dateVersion']//strong/text()").extract()
            href = firmware.xpath(".//a/@href").extract()[0].replace(
                "file-download", "file-redirect")
            text = firmware.xpath(".//a//text()").extract()[0]

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%b %d, %Y"])
            item.add_value("url", href)
            item.add_value("product", product)
            item.add_value("date", item.find_date(date))
            item.add_value("description", text)
            item.add_value("version", item.find_version_period(version))
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 10
0
    def parse_kb(self, response):
        mib = None

        # need to perform some nasty segmentation because different firmware versions are not clearly separated
        # reverse order to get MIB before firmware items
        for entry in reversed(
                response.xpath(
                    "//div[@id='support-article-downloads']/div/p")):
            for segment in reversed(entry.extract().split("<br><br>")):
                resp = HtmlResponse(url=response.url,
                                    body=segment,
                                    encoding=response.encoding)
                for href in resp.xpath("//a/@href").extract():
                    text = resp.xpath("//text()").extract()

                    if "MIBs" in href:
                        mib = href

                    elif "firmware" in href:
                        text = resp.xpath("//text()").extract()

                        item = FirmwareLoader(item=FirmwareImage(),
                                              response=resp,
                                              date_fmt=["%m/%d/%Y"])
                        item.add_value("date", item.find_date(text))
                        item.add_xpath("url", "//a/@href")
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        item.add_value(
                            "version",
                            FirmwareLoader.find_version_period(text))
                        yield item.load_item()
Exemplo n.º 11
0
    def parse_url(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                if "package/" not in text:
                    product = "%s-%s" % (
                        response.meta["product"], text[0:-1]
                    ) if "product" in response.meta else text[0:-1]

                    yield Request(url=urlparse.urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={
                                      "version": response.meta["version"],
                                      "product": product
                                  },
                                  callback=self.parse_url)
            elif any(
                    href.endswith(x)
                    for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d-%b-%Y"])
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value(
                    "date",
                    item.find_date(link.xpath("following::text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 12
0
    def parse_product(self, response):
        # types: firmware = 20, gpl source = 30, bios = 3
        for entry in response.xpath(
                "//div[@id='div_type_20']/div[@id='download-os-answer-table']"):
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y/%m/%d"])

            version = FirmwareLoader.find_version_period(
                entry.xpath("./p//text()").extract())
            gpl = None

            # grab first download link (e.g. DLM instead of global or p2p)
            href = entry.xpath("./table//tr[3]//a/@href").extract()[0]

            # attempt to find matching source code entry
            if version:
                for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"):
                    if version in "".join(source.xpath("./p//text()").extract()):
                        gpl = source.xpath("./table//tr[3]//a/@href").extract()[0]

            item.add_value("version", version)
            item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract()))
            item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract()))
            item.add_value("url", href)
            item.add_value("sdk", gpl)
            item.add_value("product", response.meta["product"])
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 13
0
    def parse_model_files(self, response):
        meta = response.meta

        # Due to Python2 and unicode objects, we're using response body here.  Issues are from the 'remarks' fields.
        try:
            model_files = json.loads(response.body)['downloads']['firmware']
        except KeyError:
            logging.info("No downloadable firmware for %s", meta)
            return

        for _, fw_info in model_files.iteritems():
            href = fw_info['links'][
                'global']  # options: {'global', 'europe', 'usa'}
            if not href.startswith(u"https://") and not href.startswith(
                    u"http://"):
                href = urlparse.urljoin(u"https://", href)

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt="%Y-%m-%d")
            item.add_value('product', meta['name'])
            item.add_value('vendor', self.name)
            item.add_value('description', fw_info['releasenote'])
            item.add_value('date', fw_info['published_at'])
            item.add_value('version', fw_info['version'])
            item.add_value('url', href)
            yield item.load_item()
Exemplo n.º 14
0
    def parse_kb(self, response):
        mib = None

        # need to perform some nasty segmentation because different firmware versions are not clearly separated
        # reverse order to get MIB before firmware items
        for entry in reversed(response.xpath(
                "//div[@id='support-article-downloads']/div/p")):
            for segment in reversed(entry.extract().split("<br><br>")):
                resp = HtmlResponse(
                    url=response.url, body=segment, encoding=response.encoding)
                for href in resp.xpath("//a/@href").extract():
                    text = resp.xpath("//text()").extract()

                    if "MIBs" in href:
                        mib = href

                    elif "firmware" in href:
                        text = resp.xpath("//text()").extract()

                        item = FirmwareLoader(
                            item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"])
                        item.add_value("date", item.find_date(text))
                        item.add_xpath("url", "//a/@href")
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        item.add_value(
                            "version", FirmwareLoader.find_version_period(text))
                        yield item.load_item()
Exemplo n.º 15
0
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url, body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%b %d, %Y", "%B %d, %Y",
                                                    "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 16
0
    def parse_product(self, response):
        # types: firmware = 20, gpl source = 30, bios = 3
        for entry in response.xpath(
                "//div[@id='div_type_20']/div[@id='download-os-answer-table']"):
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y/%m/%d"])

            version = FirmwareLoader.find_version_period(
                entry.xpath("./p//text()").extract())
            gpl = None

            # grab first download link (e.g. DLM instead of global or p2p)
            href = entry.xpath("./table//tr[3]//a/@href").extract()[0]

            # attempt to find matching source code entry
            if version:
                for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"):
                    if version in "".join(source.xpath("./p//text()").extract()):
                        gpl = source.xpath("./table//tr[3]//a/@href").extract()[0]

            item.add_value("version", version)
            item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract()))
            item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract()))
            item.add_value("url", href)
            item.add_value("sdk", gpl)
            item.add_value("product", response.meta["product"])
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 17
0
    def parse(self, response):
        for link in response.xpath("//table//tr"):
            if not link.xpath("./td[2]/a"):
                continue

            text = link.xpath("./td[2]/a/text()").extract()[0]
            href = link.xpath("./td[2]//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith("/"):
                build = response.meta.get("build", None)
                product = response.meta.get("product", None)

                if not product:
                    product = text
                elif not build:
                    build = text.replace("build", "")

                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    meta={"build": build, "product": product},
                    callback=self.parse,
                )
            elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                item.add_value("build", response.meta["build"])
                item.add_value("url", href)
                item.add_value("version", FirmwareLoader.find_version_period(os.path.splitext(text)[0].split("-")))
                item.add_value("date", item.find_date(link.xpath("./td[3]/text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 18
0
    def parse_model_page(self, response):
        for entry in response.xpath(
                "//section[@id='topicsdownload']//div[@class='col topic']/"
                "section[@class='box articles']//div[@class='accordion-item']"
        ):
            name = entry.xpath(
                "./a[@class='accordion-title']/h1/text()").extract_first()
            url = entry.xpath(
                "./div[@class='accordion-content']//a/@href").extract_first()

            if '#confirm-download-' in url or 'http://kb.netgear.com/' in url:
                continue

            if 'Firmware' in name and not 'Upgrade' in name:
                name_split = name.split(" ")
                index = name_split.index('Version')
                # only continue if there is a version number
                if index:
                    version = name_split[index + 1]

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response)
                    item.add_value("version", version)
                    item.add_value("url", url)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 19
0
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url,
                                body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(
                        item=FirmwareImage(),
                        response=response,
                        date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 20
0
    def parse_product(self, response):
        js = response.text
        if js.startswith("var commonInfo"):
            print response.url
            print js

            p_product = u"id:\"(?P<product>.*?)\""
            p_description = u"title:\"(?P<description>.*?)\""
            p_version = u"romVersions:\"(?P<version>.*?)\""
            p_url = u"romUrl:\"(?P<url>.*?)\""
            p_date = u"updateDate:\"(?P<date>.*?)\""

            import re
            products = re.findall(p_version, js)
            descriptions = re.findall(p_description, js)
            versions = re.findall(p_version, js)
            urls = re.findall(p_url, js)
            dates = re.findall(p_date, js)

            for i in xrange(len(products)):
                product = products[i]
                url = urls[i]
                version = versions[i]
                description = descriptions[i]
                date = dates[i]

                item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                item.add_value("url", url)
                item.add_value("product", product)
                item.add_value("description", description)
                item.add_value("date", date)
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 21
0
    def parse_product(self, response):
        if response.xpath("//dl[@id='dlDropDownBox']") and "build" not in response.meta:
            for entry in response.xpath("//dl[@id='dlDropDownBox']//li/a"):
                href = entry.xpath("./@href").extract()[0]
                text = entry.xpath(".//text()").extract()[0]

                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    meta={"product": response.meta["product"], "build": text},
                    headers={"Referer": response.url},
                    callback=self.parse_product,
                )
        else:
            sdk = None

            for href in reversed(response.xpath("//div[@id='content_gpl_code']//a/@href").extract()):
                sdk = href

            for entry in response.xpath("//div[@id='content_firmware']//table"):
                href = entry.xpath("./tbody/tr[1]/th[1]//a/@href").extract()[0]
                text = entry.xpath("./tbody/tr[1]/th[1]//a//text()").extract()[0]
                date = entry.xpath("./tbody/tr[1]/td[1]//text()").extract()

                item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"])
                item.add_value("url", href)
                item.add_value("date", item.find_date(date))
                item.add_value("description", text)
                item.add_value("product", response.meta["product"])
                item.add_value("build", response.meta["build"] if "build" in response.meta else None)
                item.add_value("vendor", self.vendor)
                item.add_value("sdk", sdk)
                yield item.load_item()
Exemplo n.º 22
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"):
            desc = entry.xpath(".//text()").extract()

            for link in entry.xpath(".//a"):
                href = link.xpath("./@href").extract()[0]
                text = link.xpath(".//text()").extract()[0]

                if "_a=download" not in href:
                    yield Request(url=urllib.parse.urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={"product": text.strip().split(' ')},
                                  callback=self.parse)
                elif "firmware" in text.lower() or "f/w" in text.lower():
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%Y", "%m/%d/%y"])
                    item.add_value("version",
                                   FirmwareLoader.find_version(desc))
                    item.add_value("date", item.find_date(desc))
                    item.add_value("description", text)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 23
0
    def parse_product_firmware(self, response):
        # Get product name
        product = response.meta["product"]

        # Get the product last updated date
        create_date = ''
        for li_elem in response.xpath('//li'):
            if li_elem.xpath('@class').re(r'(\[hide_empty:create_date\])'):
                create_date = li_elem.xpath(
                    './/span[@class="badge"]/text()').extract_first()
            elif li_elem.xpath('@class').re(r'(\[hide_empty:update_date\])'):
                update_date = li_elem.xpath(
                    './/span[@class="badge"]/text()').extract_first()
                break
        else:
            update_date = create_date

        # File list table of downloads
        file_table = response.xpath(
            '//table[@class="wpdm-filelist table table-hover"]')
        for dl_button in file_table.xpath(
                './/a[@class="inddl btn btn-primary btn-sm"]'):
            href = dl_button.xpath("@href")

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt="%B %d, %Y")
            item.add_value("product", product)
            item.add_value("vendor", self.name)
            item.add_value("date", update_date)
            item.add_value("url", href.extract_first())

            yield item.load_item()
Exemplo n.º 24
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"):
            desc = entry.xpath(".//text()").extract()

            for link in entry.xpath(".//a"):
                href = link.xpath("./@href").extract()[0]
                text = link.xpath(".//text()").extract()[0]

                if "_a=download" not in href:
                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"product": text.strip().split(' ')},
                        callback=self.parse)
                elif "firmware" in text.lower() or "f/w" in text.lower():
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%Y", "%m/%d/%y"])
                    item.add_value("version", FirmwareLoader.find_version(desc))
                    item.add_value("date", item.find_date(desc))
                    item.add_value("description", text)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 25
0
 def parse(self, response):
     # bit ugly but it works :-)
     if "pid" not in response.meta:
         for pid in range(0, 1000):
             yield Request(url=urllib.parse.urljoin(
                 response.url, "firmware_details.html?id=%s" % pid),
                           meta={"pid": pid},
                           headers={
                               "Referer": response.url,
                               "X-Requested-With": "XMLHttpRequest"
                           },
                           callback=self.parse)
     else:
         for product in response.xpath(
                 "//div[@class='download_list_icon']/span/text()").extract(
                 ):
             prods = response.xpath("//table[@class='down_table']//tr")
             # print(prods)
             # skip the table header
             for p in [x for x in prods[1:]]:
                 version = p.xpath('td[1]//text()').extract_first()
                 # skip partial versions
                 if '_p' in version:
                     continue
                 item = FirmwareLoader(item=FirmwareImage(),
                                       response=response)
                 item.add_value("version", version)
                 item.add_value(
                     "url", 'https://www.foscam.com' +
                     p.xpath('td[6]//a/@href').extract_first())
                 item.add_value("product", product)
                 item.add_value("vendor", self.name)
                 yield item.load_item()
Exemplo n.º 26
0
    def parse(self, response):
        for entry in response.xpath("//table/tr[position() > 3]"):
            if not entry.xpath("./td[2]/a"):
                continue

            text = entry.xpath("./td[2]/a//text()").extract()[0]
            href = entry.xpath("./td[2]/a/@href").extract()[0]
            date = entry.xpath("./td[3]//text()").extract()[0]

            # if "DSM" in response.url:
            if 'DSMUC' in response.url:
                software = 'DSMUC'
            elif 'DSM' in response.url:
                software = "DSM"
            elif 'VSM' in response.url:
                software = "VSM"
            elif "VSF" in response.url:
                software = "VSF"
            elif "SRM" in response.url:
                software = "SRM"
            else:
                continue  # should not happen :-)

            if href.endswith('/'):
                build = None
                version = response.meta.get(
                    "version", FirmwareLoader.find_version_period([text]))
                if not FirmwareLoader.find_version_period([text]):
                    build = text[0: -1]

                yield Request(
                    url=urllib.parse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    meta={"build": build, "version": version},
                    callback=self.parse)
            elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]):
                product = None
                basename = os.path.splitext(text)[0].split("_")

                if software in basename:
                    if response.meta["build"] in basename:
                        basename.remove(response.meta["build"])
                    basename.remove(software)
                    product = " ".join(basename)
                else:
                    # usually "synology_x86_ds13_1504
                    product = basename[-2]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("build", response.meta["build"])
                item.add_value("version", response.meta["version"])
                if software == "DSM":
                    item.add_value("mib", "https://global.download.synology.com/download/Document/Software/"
                                          "DeveloperGuide/Firmware/DSM/All/enu/Synology_MIB_File.zip")
                item.add_value("url", href)
                item.add_value("date", date)
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 27
0
    def parse_download(self, response):
        for entry in response.xpath("//div[@class='downloadtable']"):
            text = entry.xpath(".//text()").extract()

            if "firmware" in " ".join(text).lower():
                text = entry.xpath(
                    ".//li[@class='maindescription' and position() = 1]//text()"
                ).extract()
                date = entry.xpath(
                    ".//li[@class='maindescription' and position() = 2]//text()"
                ).extract()
                href = entry.xpath(
                    ".//li[@class='maindescription']//a/@onclick"
                ).extract()[0].split(
                    '\'')[1] + "&button=Continue+with+Download&Continue=yes"

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%m/%d/%Y"])
                item.add_value("url", href)
                item.add_value("product", response.meta["product"])
                item.add_value("date", item.find_date(date))
                item.add_value("version", FirmwareLoader.find_version(text))
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 28
0
 def parse(self, response):
     for url in self.firmware:
         item = FirmwareLoader(item=FirmwareImage())
         item.add_value("url", url)
         item.add_value("product", url.split("/")[-1].split("_")[0])
         item.add_value("vendor", self.name)
         yield item.load_item()
Exemplo n.º 29
0
    def parse_product(self, response):

        #<a href="#Firmware"><span>Firmware</span></a>
        if not response.xpath("//a[@href=\"#Firmware\"]").extract():
            yield None

        description = response.xpath(
            "//div[@class=\"product-name\"]//strong/text()").extract()[0]
        url = response.xpath(
            "//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href"
        ).extract()[0]
        date = response.xpath(
            "//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()"
        ).extract()[0]

        item = FirmwareLoader(item=FirmwareImage(),
                              response=response,
                              date_fmt=["%d/%m/%y"])

        item.add_value("url", url)
        item.add_value("date", item.find_date(date))
        item.add_value("description", description)
        item.add_value("product", response.meta["product"])
        item.add_value("version", response.meta["version"])
        item.add_value("vendor", self.vendor)
        yield item.load_item()
Exemplo n.º 30
0
    def parse_product(self, response):
        text = response.xpath(
            "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract(
            )[0].encode("ascii", errors="ignore")
        date = response.xpath(
            "//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract(
            )
        href = response.xpath(
            "//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract(
            )[0]
        desc = response.xpath(
            "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract(
            )[0].encode("utf-8")

        build = None
        product = None
        if "_" in text:
            build = text.split("_")[1]
            product = text.split("_")[0]
        elif " " in text:
            product = text.split(" ")[0]

        item = FirmwareLoader(item=FirmwareImage(),
                              response=response,
                              date_fmt=["%Y/%m/%d"])
        item.add_value("url", href.encode("utf-8"))
        item.add_value("date", item.find_date(date))
        item.add_value("description", desc)
        item.add_value("build", build)
        item.add_value("product", product)
        item.add_value("vendor", self.vendor)
        yield item.load_item()
Exemplo n.º 31
0
    def parse_product(self, response):
        for a in response.xpath("//div[@id='mainbox']//dd/a"):
            url = a.xpath("./@href").extract()[0]
            title = a.xpath("./text()").extract()[0]
            description = title

            items = title.split(' ')
            product = items[0]
            version = items[-1]

            #FH456V1.0 Firmware V10.1.1.1_EN
            #E101(V2.0) Firmware V1.10.0.1_EN
            #G3(V2.0) Firmware V2.0.0.1_EN
            #O3 Firmware V1.0.0.3_EN
            #i6 Firmware V1.0.0.9(3857)_EN
            import re
            p = r'^(?P<product>([a-uw-zA-UW-Z0-9])+)[\(\uff08]?(V\d\.0)?'
            try:
                ret = re.search(p, items[0].decode('utf-8'))

                if ret:
                    product = ret.group('product')
            except:
                product = item[0]

            item = FirmwareLoader(
                item=FirmwareImage(), response=response)
            item.add_value(
                "version", version)
            item.add_value("url", url)
            item.add_value("product", product)
            item.add_value("vendor", self.vendor)
            yield item.load_item()
Exemplo n.º 32
0
    def parse_product(self, response):
        for a in response.xpath("//div[@id='mainbox']//dd/a"):
            url = a.xpath("./@href").extract()[0]
            title = a.xpath("./text()").extract()[0]
            description = title

            items = title.split(' ')
            product = items[0]
            version = items[-1]

            #FH456V1.0 Firmware V10.1.1.1_EN
            #E101(V2.0) Firmware V1.10.0.1_EN
            #G3(V2.0) Firmware V2.0.0.1_EN
            #O3 Firmware V1.0.0.3_EN
            #i6 Firmware V1.0.0.9(3857)_EN
            import re
            p = ur'^(?P<product>([a-uw-zA-UW-Z0-9])+)[\(\uff08]?(V\d\.0)?'
            try:
                ret = re.search(p, items[0].decode('utf-8'))

                if ret:
                    product = ret.group('product')
            except:
                product = item[0]

            item = FirmwareLoader(
                item=FirmwareImage(), response=response)
            item.add_value(
                "version", version)
            item.add_value("url", url)
            item.add_value("product", product)
            item.add_value("vendor", self.vendor)
            yield item.load_item()
Exemplo n.º 33
0
    def parse_product(self, response):
        text = (
            response.xpath("//div[@class='download']/table[1]//tr[1]/td[2]//text()")
            .extract()[0]
            .encode("ascii", errors="ignore")
        )
        date = response.xpath("//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract()
        href = response.xpath("//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract()[0]
        desc = response.xpath("//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract()[0].encode("utf-8")

        build = None
        product = None
        if "_" in text:
            build = text.split("_")[1]
            product = text.split("_")[0]
        elif " " in text:
            product = text.split(" ")[0]

        item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"])
        item.add_value("url", href.encode("utf-8"))
        item.add_value("date", item.find_date(date))
        item.add_value("description", desc)
        item.add_value("build", build)
        item.add_value("product", product)
        item.add_value("vendor", self.vendor)
        yield item.load_item()
Exemplo n.º 34
0
    def parse_product(self, response):
        js = response.text
        if js.startswith("var commonInfo"):
            print response.url
            print js

            p_product = u"id:\"(?P<product>.*?)\""
            p_description = u"title:\"(?P<description>.*?)\""
            p_version = u"romVersions:\"(?P<version>.*?)\""
            p_url = u"romUrl:\"(?P<url>.*?)\""
            p_date = u"updateDate:\"(?P<date>.*?)\""

            import re
            products = re.findall(p_version, js)
            descriptions = re.findall(p_description, js)
            versions = re.findall(p_version, js)
            urls = re.findall(p_url, js)
            dates = re.findall(p_date, js)

            for i in xrange(len(products)):
                product = products[i]
                url = urls[i]
                version = versions[i]
                description = descriptions[i]
                date = dates[i]

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("url", url)
                item.add_value("product", product)
                item.add_value("description", description)
                item.add_value("date", date)
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 35
0
    def parse_product_sw_fw(self, response):
        product = response.meta['product']
        fw_sect = None

        #inspect_response(response, self)
        col_selector_map = {}
        # Find the "Firmware" section.  NOTE: whitespace in the class is intentional
        for section in response.css('div.docs-table__section '):
            for col in section.css('div.docs-table__column-name'):
                col_text = col.xpath('.//text()').extract_first().strip()
                if len(col_text) > 1:
                    col_selector_map[col_text] = section
        try:
            fw_sect = col_selector_map[u'Firmware']
        except KeyError:
            logging.debug("Did not find a 'Firmware' section in the downloads for %s", product)
            return

        # Iterate Firmware rows
        for fw_row in fw_sect.css('div.docs-table__row'):
            fw_version, fw_href, fw_date, fw_desc = self.extract_fw_info(fw_row, response)
            if fw_href is None:
                continue

            item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%m/%d/%y"])
            item.add_value('product', product)
            item.add_value('vendor', self.name)
            item.add_value('url', fw_href)
            item.add_value('description', fw_desc)
            item.add_value('date', fw_date)
            yield item.load_item()
Exemplo n.º 36
0
    def parse(self, response):
        for product_group in response.css("div.item"):
            try:
                category = product_group.css("h2 span::text").get().strip()
            except:
                category = product_group.css("h2::text").get().strip()
            for product in product_group.css("a"):
                model = product.css("::text").get().strip()
                link = product.css("::attr(href)").get()

                if link[-1] == "/":
                    yield response.follow(link,
                                          meta={
                                              "category": category,
                                              "product": model
                                          },
                                          callback=self.parse_product)
                else:
                    item = FirmwareLoader(item=FirmwareImage(),
                                          date_fmt=["%y-%m-%d"])
                    item.add_value("vendor", self.vendor)
                    item.add_value("url", link)
                    item.add_value("product", model)
                    item.add_value("category", category)
                    yield item.load_item()
Exemplo n.º 37
0
    def parse_json(self, response):
        resp = json.loads(response.text)
        self.logger.debug(resp)
        for product in resp:
            name = product['showName'].strip()
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%Y%m%d"])

            # Model, Version, Date, Build
            self.logger.debug("Parsing '%s'" % name)
            match = re.search(r'^(.+) (V[\d\.]+)([^\d]+)(\d+)_([\d\.]+)$',
                              name)

            if match:
                self.logger.debug(match.groups())
                item.add_value("product", match[1])
                item.add_value("version", match[2])
                date = match[4]
                if len(date) == 6:
                    date = "20" + date
                item.add_value("date", date)
                item.add_value("build", match[5])
            else:
                # TL-NVR5104 V1.0_171205.标准版
                match = re.search(
                    r'^(.+)[_ ]([vV][\d\.]+)([^\d]*)_([\d]+)([^\d]+)$', name)
                if match:
                    self.logger.debug(match.groups())
                    item.add_value("product", match[1])
                    item.add_value("version", match[2])
                    date = match[4]
                    if len(date) == 6:
                        date = "20" + date
                    item.add_value("date", date)
                    item.add_value("build", match[5])
                else:
                    # TL-IPC545K(P) V3.0_180227(1.0.14)标准版
                    match = re.search(
                        r'^(.+)[_ ](V[\d\.]+)_(\d+)(([\d\.]+))([^\d]+)$', name)
                    if match:
                        self.logger.debug(match.groups())
                        item.add_value("product", match[1])
                        item.add_value("version", match[2])
                        date = match[3]
                        if len(date) == 6:
                            date = "20" + date
                        item.add_value("date", date)
                        item.add_value("build", match[4] + ' ' + match[5])
                    else:
                        self.logger.debug("No match for %s" % name)
            print('http://service.tp-link.com.cn/download/' +
                  quote(product['fileName']))
            item.add_value(
                "url", 'http://service.tp-link.com.cn/download/' +
                quote(product['fileName']))
            item.add_value("description", name)
            item.add_value("vendor", self.vendor)
            yield item.load_item()
Exemplo n.º 38
0
    def parse_product(self, response):
        url = response.xpath("//div[@class='thumbnail']//a/@href").extract()[0]

        item = FirmwareLoader(item=FirmwareImage(), response=response)
        item.add_value("version", response.meta['version'])
        item.add_value("url", url)
        item.add_value("product", response.meta['product'])
        item.add_value("vendor", self.vendor)
        yield item.load_item()
Exemplo n.º 39
0
    def parse_product(self, response):
        url =self.firmware_url + response.xpath('//a[@id="downLoadHref"]/@href').extract()[0]

        item = FirmwareLoader(item=FirmwareImage(), response=response)
        item.add_xpath("date", response.meta['date'])
        item.add_value("description",  response.meta['description'])
        item.add_value("url",  url)
        item.add_value("product", response.meta["product"])
        item.add_value("vendor", self.name)
        yield item.load_item()
Exemplo n.º 40
0
    def parse(self, response):
        if "product" in response.meta:
            for entry in response.xpath("//div[@class='tab-content']//tr")[1:]:

                version = entry.xpath("./td[1]//a//text()").extract_first()
                url = entry.xpath("./td[2]//a/@href").extract_first()
                if version is None or url is None:
                    continue

                # remove unnecessary files
                to_remove_list = [
                    "end user license agreement", "eula", "release notes",
                    "mac os", "windows", "guide", "(pdf)", "sample", "client",
                    "manager", "software", "virtual", "control_panel",
                    "activexbypass"
                ]
                if any(x in url.lower() for x in to_remove_list) \
                        or any(x in version.lower() for x in to_remove_list) \
                        or any(url.endswith(x) for x in ["htm", "html", "pdf", "ova", ".plcm.vc"]):
                    continue

                url = urllib.parse.urljoin(response.url,
                                           PolycomSpider.fix_url(url)),

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("version", version)
                item.add_value("url", url)
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()

        # all entries on the product overview pages
        elif response.xpath("//div[@class='product-listing']"
                            ) and "product" not in response.meta:
            for entry in response.xpath("//div[@class='product-listing']//li"):
                if not entry.xpath("./a"):
                    continue

                text = entry.xpath("./a//text()").extract_first()
                href = entry.xpath("./a/@href").extract_first().strip()
                # date = entry.xpath("./span//text()").extract()

                if any(x in text.lower() for x in ["advisories", "support", "notices", "features"]) \
                        or href.endswith(".pdf"):
                    continue

                path = urllib.parse.urlparse(href).path
                if any(path.endswith(x)
                       for x in [".htm", ".html"]) or "(html)" in text.lower():
                    yield Request(url=urllib.parse.urljoin(
                        response.url, PolycomSpider.fix_url(href)),
                                  meta={"product": text},
                                  headers={"Referer": response.url},
                                  callback=self.parse)
Exemplo n.º 41
0
    def parse(self, response):
        for a in response.xpath("//table//tr//td[2]//a"):
            title = a.xpath('./@title').extract()[0]
            url = a.xpath('./@href').extract()[0]

            item = FirmwareLoader(item=FirmwareImage(), response=response)
            item.add_value("url", url)
            item.add_value("product", self.parse_product(title))
            item.add_value("description", title)
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 42
0
 def parse_product(self, response):
     url = response.xpath("//div[@class='thumbnail']//a/@href").extract()[0]
         
     item = FirmwareLoader(
         item=FirmwareImage(), response=response)
     item.add_value(
         "version", response.meta['version'])
     item.add_value("url", url)
     item.add_value("product", response.meta['product'])
     item.add_value("vendor", self.vendor)
     yield item.load_item()
Exemplo n.º 43
0
 def download_item(self, response):
     url = "https:" + str(response.xpath("//div[@class='downbtns']/a/@href").extract_first())
     item = FirmwareLoader(item=FirmwareImage(),
                           response=response,
                           date_fmt=["%Y-%m-%d"])
     item.add_value("url", url)
     item.add_value("version", response.meta["version"])
     item.add_value("date", response.meta["date"])
     item.add_value("product", response.meta["product"])
     item.add_value("vendor", self.name)
     yield item.load_item()
Exemplo n.º 44
0
    def parse_product(self, response):
        url = self.firmware_url + response.xpath(
            '//a[@id="downLoadHref"]/@href').extract()[0]

        item = FirmwareLoader(item=FirmwareImage(), response=response)
        item.add_xpath("date", response.meta['date'])
        item.add_value("description", response.meta['description'])
        item.add_value("url", url)
        item.add_value("product", response.meta["product"])
        item.add_value("vendor", self.name)
        yield item.load_item()
Exemplo n.º 45
0
    def parse(self, response):
        for a in response.xpath("//table//tr//td[2]//a"):
            title = a.xpath('./@title').extract()[0]
            url = a.xpath('./@href').extract()[0]

            item = FirmwareLoader(
                        item=FirmwareImage(), response=response)
            item.add_value("url", url)
            item.add_value("product", self.parse_product(title))
            item.add_value("description", title)
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 46
0
    def parse(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".img"):
                basename = href.split("/")[-1].split("-")

                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("url", href)
                item.add_value("product", self.name)
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", basename[-1][0: basename[-1].rfind(".img")])
                yield item.load_item()
Exemplo n.º 47
0
 def parse(self, response):
     for href in response.xpath("//a/@href").extract():
         if href == ".." or href == "/":
             continue
         elif href.endswith(".bin") or href.endswith(".upg"):
             item = FirmwareLoader(item=FirmwareImage(), response=response)
             item.add_value("url", href)
             item.add_value("vendor", self.name)
             yield item.load_item()
         elif "/" in href:
             yield Request(url=urllib.parse.urljoin(response.url, href),
                           headers={"Referer": response.url},
                           callback=self.parse)
Exemplo n.º 48
0
    def parse(self, response):
        for i in range(0, len(response.xpath("//div[@id='main_right']/span[1]/p")), 7):
            prods = response.xpath("//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 2)).extract()[0].split("\r\n")

            for product in [x for x in prods]:
                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_xpath("version", "//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 3))
                item.add_xpath("url", "//div[@id='main_right']/span[1]//p[%d]/a/@href" % (i + 7))
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()

        for i in range(0, len(response.xpath("//div[@id='main_right']/span[2]/p")), 5):
            prods = response.xpath("//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 2)).extract()[0].split(",")

            for product in [x for x in prods]:
                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_xpath("version", "//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 3))
                item.add_xpath("url", "//div[@id='main_right']/span[2]//p[%d]/a/@href" % (i + 5))
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 49
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='main-container']//p|//div[@class='main-container']//ul"):
            text = entry.xpath(".//text()").extract()

            for href in entry.xpath(".//a/@href").extract():
                if "Firmware" in href:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response)
                    item.add_value("url", href)
                    item.add_value(
                        "product", FirmwareLoader.find_product(text))
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 50
0
    def parse_download(self, response):
        for link in response.xpath("//div[@id='auto']//a"):
            href = link.xpath("./@href").extract()[0]
            text = link.xpath(".//text()").extract()[0]

            if ("downloads" in href or "firmware" in href) and \
                not href.endswith(".html"):
                item = FirmwareLoader(item=FirmwareImage(), response=response)
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value("description", text)
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 51
0
 def parse_link(self, response):
     # some items will require captcha authentication and pass a cookie e.g.
     # DownloadAuthorizationToken =
     # 7CB8169BFC8848B097BB071118F9E067431714963E3A74A45C8883A70654999980D7F1412CB98B87C802403D74B6A2611122BB3CCEE0B2ACDEEAACA8054B8FFBC4AB2C2CC992649F733AFB2446AA3DC66131E62F0697E9267A374A9E965D1286EC3CFEA1142B5244D497974E5992A3F172581BE78559432DA3A64ECC940D3C43A3C91427EEC5FC712A4ADF64D2FC6C31D62BD8E4417964B31AC6E0B8344EADEA6E81DBB33F522979F3C4FE33ECA4240C188C2C88FAEBC3E0C27AEDF79558E9113F2E7BB2CA261666A26CDA82074F0DC777F2BDB28A5A2588F7F4F67E2A4F04C4DDEE6E3A2A78E2106D2F324986705580070A9016C96007E82332EA1F1D2E9688033F514754555CE186695284B05B24DE6C99F22CCF4F43A7CB5D8AD9053929E3EFDAD40FD20497F1D9ED45BAA4C7CF1C2207C751624D755EBF0C4FF98C9B2E41437E41674C836D80C83C902C4B8B8ADDA23D813D9FA5B3331C36B05CE3C1F479220B7A02
     for link in response.xpath("//tbody[@class='etdownloaditems']//tr"):
         item = FirmwareLoader(item=FirmwareImage(), response=response)
         item.add_value("version", link.xpath(
             ".//td[@class='column-version']//text()").extract()[0].strip())
         item.add_value("url", link.xpath(".//th/a/@href").extract()[0])
         item.add_value("description", link.xpath(
             ".//th/a//text()").extract()[0])
         item.add_value("product", response.meta["product"])
         item.add_value("vendor", self.name)
         yield item.load_item()
Exemplo n.º 52
0
 def parse(self, response):
     for href in response.xpath("//a/@href").extract():
         if href == ".." or href == "/":
             continue
         elif href.endswith(".bin") or href.endswith(".upg"):
             item = FirmwareLoader(item=FirmwareImage(), response=response)
             item.add_value("url", href)
             item.add_value("vendor", self.name)
             yield item.load_item()
         elif "/" in href:
             yield Request(
                 url=urlparse.urljoin(response.url, href),
                 headers={"Referer": response.url},
                 callback=self.parse)
Exemplo n.º 53
0
    def parse(self, response):
        if response.xpath("//select[@id='router']"):
            for product in response.xpath(
                    "//select[@id='router']/option/@value").extract():
                if product and product != "allrouters":
                    yield Request(
                        url=urlparse.urljoin(
                            response.url, "?router=%s" % (product)),
                        headers={"Referer": response.url},
                        callback=self.parse)

        elif response.xpath("//td[@id='search_main_content']"):
            for link in response.xpath("//td[@id='search_main_content']//a"):
                if link.xpath("./@href"):
                    href = link.xpath("./@href").extract()[0]
                    text = link.xpath(".//text()").extract()

                    if "download.verizon.net" in href and "firmware" in href:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("description", text[0])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
        else:
            for link in response.xpath("//div[@id='ghfbodycontent']//a"):
                if link.xpath("./@href"):
                    href = link.xpath("./@href").extract()[0]
                    text = link.xpath(".//text()").extract()

                    if "download.verizon.net" in href and "firmware" in href:
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value("url", href)
                        item.add_value("description", text[0])
                        item.add_value("vendor", self.name)
                        yield item.load_item()
Exemplo n.º 54
0
 def parse_product(self, response):
     for image in response.xpath(
             "//div[@id='accordion-2']//tr[position() > 1]"):
         text = image.xpath("./td[2]//a[1]/text()").extract()
         if "firmware" in "".join(text).lower():
             item = FirmwareLoader(item=FirmwareImage(), response=response,
                                   selector=image, date_fmt=["%Y-%m-%d"])
             item.add_xpath("date", "td[1]//text()")
             item.add_value("description", text)
             item.add_xpath("url", "td[2]//a[1]/@href")
             item.add_value("product", response.meta["product"])
             item.add_value("vendor", self.name)
             item.add_value(
                 "version", FirmwareLoader.find_version_period(text))
             yield item.load_item()
Exemplo n.º 55
0
    def parse_download(self, response):
        json_response = json.loads(response.body_as_unicode())

        for file in json_response:
            if file["subFileType"] == "firmware":
                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"])
                item.add_value("version", file["fileVersion"])
                item.add_value("date", datetime.datetime.fromtimestamp(
                    int(file["releaseDate"]) / 1000).strftime(item.context.get("date_fmt")[0]))
                item.add_value("description", file["fileName"])
                item.add_value("url", file["downloadUrl"])
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.vendor)
                yield item.load_item()
Exemplo n.º 56
0
    def parse_product(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                text = response.xpath("//text()").extract()
                basename = href.split("/")[-1]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0: basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", FirmwareLoader.find_version_period(text))
                yield item.load_item()
Exemplo n.º 57
0
    def parse_product(self, response):
        import re
        #/cn/Uploads/files/20161024/K1_V22.4.2.15.bin
        print response.text
        path = re.findall(u"(/cn/Uploads/files/.*?\.bin)", response.text)[0]
        url = "http://www.phicomm.com/{}".format(path)

        item = FirmwareLoader(
            item=FirmwareImage())
        item.add_value("url", url),
        item.add_value("product", response.meta['product']),
        item.add_value("date", response.meta['date']),
        item.add_value("version", response.meta['version']),
        item.add_value("vendor", self.vendor),
        item.add_value("description", response.meta['description']),
            
        yield item.load_item()
Exemplo n.º 58
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath(".//text()").extract()[0]
            href = link.xpath(".//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    callback=self.parse)
            elif href.endswith(".gz") and ".iso" not in href:
                # strip off multiple file extensions
                basename = os.path.splitext(text)[0]
                while ".img" in basename or ".iso" in basename:
                    basename = os.path.splitext(basename)[0]

                basename = basename.split("-")
                version = FirmwareLoader.find_version_period(basename)

                # attempt to parse filename and generate product/version
                # strings
                remove = [version] if version else []
                for i in range(0, len(basename)):
                    if "BETA" in basename[i]:
                        version += "-%s%s" % (basename[i], basename[i + 1])
                        remove.append(basename[i])
                        remove.append(basename[i + 1])
                    elif "RC" in basename[i]:
                        version += "-%s" % (basename[i])
                        remove.append(basename[i])
                    elif "RELEASE" in basename[i]:
                        remove.append(basename[i])

                basename = [x for x in basename if x not in remove]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("version", version)
                item.add_value("url", href)
                item.add_value("date", item.find_date(
                    link.xpath("following::text()").extract()))
                item.add_value("product", "-".join(basename))
                item.add_value("vendor", self.name)
                yield item.load_item()