示例#1
0
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url, body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%b %d, %Y", "%B %d, %Y",
                                                    "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
示例#2
0
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url,
                                body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(
                        item=FirmwareImage(),
                        response=response,
                        date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
示例#3
0
    def parse(self, response):
        for entry in response.xpath("//table/tr[position() > 3]"):
            if not entry.xpath("./td[2]/a"):
                continue

            text = entry.xpath("./td[2]/a//text()").extract()[0]
            href = entry.xpath("./td[2]/a/@href").extract()[0]
            date = entry.xpath("./td[3]//text()").extract()[0]

            # if "DSM" in response.url:
            if 'DSMUC' in response.url:
                software = 'DSMUC'
            elif 'DSM' in response.url:
                software = "DSM"
            elif 'VSM' in response.url:
                software = "VSM"
            elif "VSF" in response.url:
                software = "VSF"
            elif "SRM" in response.url:
                software = "SRM"
            else:
                continue  # should not happen :-)

            if href.endswith('/'):
                build = None
                version = response.meta.get(
                    "version", FirmwareLoader.find_version_period([text]))
                if not FirmwareLoader.find_version_period([text]):
                    build = text[0: -1]

                yield Request(
                    url=urllib.parse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    meta={"build": build, "version": version},
                    callback=self.parse)
            elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]):
                product = None
                basename = os.path.splitext(text)[0].split("_")

                if software in basename:
                    if response.meta["build"] in basename:
                        basename.remove(response.meta["build"])
                    basename.remove(software)
                    product = " ".join(basename)
                else:
                    # usually "synology_x86_ds13_1504
                    product = basename[-2]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("build", response.meta["build"])
                item.add_value("version", response.meta["version"])
                if software == "DSM":
                    item.add_value("mib", "https://global.download.synology.com/download/Document/Software/"
                                          "DeveloperGuide/Firmware/DSM/All/enu/Synology_MIB_File.zip")
                item.add_value("url", href)
                item.add_value("date", date)
                item.add_value("product", product)
                item.add_value("vendor", self.name)
                yield item.load_item()
示例#4
0
    def parse(self, response):
        if response.xpath("//form[@name='UCagreement']"):
            for href in response.xpath(
                    "//div[@id='productAndDoc']").extract()[0].split('"'):
                if "downloads.polycom.com" in href:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", response.meta["date"])
                    item.add_value("description", response.meta["description"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()

        elif response.xpath("//div[@id='ContentChannel']"):
            for entry in response.xpath("//div[@id='ContentChannel']//li"):
                if not entry.xpath("./a"):
                    continue

                text = entry.xpath("./a//text()").extract()[0]
                href = entry.xpath("./a/@href").extract()[0].strip()
                date = entry.xpath("./span//text()").extract()

                path = urlparse.urlparse(href).path

                if any(x in text.lower() for x in ["end user license agreement", "eula", "release notes",
                                                   "mac os", "windows", "guide", "(pdf)", "sample"]) or href.endswith(".pdf"):
                    continue

                elif any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower():
                    yield Request(
                        url=urlparse.urljoin(
                            response.url, PolycomSpider.fix_url(href)),
                        meta={"product": response.meta["product"] if "product" in response.meta else text,
                              "date": date, "version": FirmwareLoader.find_version_period([text]), "description": text},
                        headers={"Referer": response.url},
                        callback=self.parse)

                elif path:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"])
                    item.add_value(
                        "version", FirmwareLoader.find_version_period([text]))
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", item.find_date(date))
                    item.add_value("description", text)
                    # item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
示例#5
0
    def parse_download(self, response):
        for firmware in response.xpath(
                "//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"
        ):
            product = response.xpath(
                "//div[@class='prodNavHeaderBody']//text()").extract(
                )[0].replace(" Support & Drivers", "")
            date = firmware.xpath(
                ".//ul[@class='dateVersion']//strong/text()").extract()
            version = firmware.xpath(
                ".//ul[@class='dateVersion']//strong/text()").extract()
            href = firmware.xpath(".//a/@href").extract()[0].replace(
                "file-download", "file-redirect")
            text = firmware.xpath(".//a//text()").extract()[0]

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%b %d, %Y"])
            item.add_value("url", href)
            item.add_value("product", product)
            item.add_value("date", item.find_date(date))
            item.add_value("description", text)
            item.add_value("version", item.find_version_period(version))
            item.add_value("vendor", self.name)
            yield item.load_item()
示例#6
0
    def parse_kb(self, response):
        mib = None

        # need to perform some nasty segmentation because different firmware versions are not clearly separated
        # reverse order to get MIB before firmware items
        for entry in reversed(
                response.xpath(
                    "//div[@id='support-article-downloads']/div/p")):
            for segment in reversed(entry.extract().split("<br><br>")):
                resp = HtmlResponse(url=response.url,
                                    body=segment,
                                    encoding=response.encoding)
                for href in resp.xpath("//a/@href").extract():
                    text = resp.xpath("//text()").extract()

                    if "MIBs" in href:
                        mib = href

                    elif "firmware" in href:
                        text = resp.xpath("//text()").extract()

                        item = FirmwareLoader(item=FirmwareImage(),
                                              response=resp,
                                              date_fmt=["%m/%d/%Y"])
                        item.add_value("date", item.find_date(text))
                        item.add_xpath("url", "//a/@href")
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        item.add_value(
                            "version",
                            FirmwareLoader.find_version_period(text))
                        yield item.load_item()
示例#7
0
    def parse_kb(self, response):
        mib = None

        # need to perform some nasty segmentation because different firmware versions are not clearly separated
        # reverse order to get MIB before firmware items
        for entry in reversed(response.xpath(
                "//div[@id='support-article-downloads']/div/p")):
            for segment in reversed(entry.extract().split("<br><br>")):
                resp = HtmlResponse(
                    url=response.url, body=segment, encoding=response.encoding)
                for href in resp.xpath("//a/@href").extract():
                    text = resp.xpath("//text()").extract()

                    if "MIBs" in href:
                        mib = href

                    elif "firmware" in href:
                        text = resp.xpath("//text()").extract()

                        item = FirmwareLoader(
                            item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"])
                        item.add_value("date", item.find_date(text))
                        item.add_xpath("url", "//a/@href")
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        item.add_value(
                            "version", FirmwareLoader.find_version_period(text))
                        yield item.load_item()
示例#8
0
文件: asus.py 项目: MikimotoH/scraper
    def parse_product(self, response):
        # types: firmware = 20, gpl source = 30, bios = 3
        for entry in response.xpath(
                "//div[@id='div_type_20']/div[@id='download-os-answer-table']"):
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y/%m/%d"])

            version = FirmwareLoader.find_version_period(
                entry.xpath("./p//text()").extract())
            gpl = None

            # grab first download link (e.g. DLM instead of global or p2p)
            href = entry.xpath("./table//tr[3]//a/@href").extract()[0]

            # attempt to find matching source code entry
            if version:
                for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"):
                    if version in "".join(source.xpath("./p//text()").extract()):
                        gpl = source.xpath("./table//tr[3]//a/@href").extract()[0]

            item.add_value("version", version)
            item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract()))
            item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract()))
            item.add_value("url", href)
            item.add_value("sdk", gpl)
            item.add_value("product", response.meta["product"])
            item.add_value("vendor", self.name)
            yield item.load_item()
示例#9
0
    def parse(self, response):
        for link in response.xpath("//table//tr"):
            if not link.xpath("./td[2]/a"):
                continue

            text = link.xpath("./td[2]/a/text()").extract()[0]
            href = link.xpath("./td[2]//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith("/"):
                build = response.meta.get("build", None)
                product = response.meta.get("product", None)

                if not product:
                    product = text
                elif not build:
                    build = text.replace("build", "")

                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    meta={"build": build, "product": product},
                    callback=self.parse,
                )
            elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                item.add_value("build", response.meta["build"])
                item.add_value("url", href)
                item.add_value("version", FirmwareLoader.find_version_period(os.path.splitext(text)[0].split("-")))
                item.add_value("date", item.find_date(link.xpath("./td[3]/text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
示例#10
0
    def parse_product(self, response):
        # types: firmware = 20, gpl source = 30, bios = 3
        for entry in response.xpath(
                "//div[@id='div_type_20']/div[@id='download-os-answer-table']"):
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y/%m/%d"])

            version = FirmwareLoader.find_version_period(
                entry.xpath("./p//text()").extract())
            gpl = None

            # grab first download link (e.g. DLM instead of global or p2p)
            href = entry.xpath("./table//tr[3]//a/@href").extract()[0]

            # attempt to find matching source code entry
            if version:
                for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"):
                    if version in "".join(source.xpath("./p//text()").extract()):
                        gpl = source.xpath("./table//tr[3]//a/@href").extract()[0]

            item.add_value("version", version)
            item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract()))
            item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract()))
            item.add_value("url", href)
            item.add_value("sdk", gpl)
            item.add_value("product", response.meta["product"])
            item.add_value("vendor", self.name)
            yield item.load_item()
示例#11
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            yield Request(
                url=urlparse.urljoin(response.url, href),
                headers={"Referer": response.url},
                meta={"version": FirmwareLoader.find_version_period(text)},
                callback=self.parse_url)
示例#12
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            yield Request(
                url=urlparse.urljoin(response.url, href),
                headers={"Referer": response.url},
                meta={"version": FirmwareLoader.find_version_period(text)},
                callback=self.parse_url)
示例#13
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract_first()
            href = link.xpath("@href").extract_first()

            if text is None and href == "/":
                # <a href="/"><em>(root)</em></a>
                continue

            yield Request(
                url=urllib.parse.urljoin(response.url, href),
                headers={"Referer": response.url},
                meta={"version": FirmwareLoader.find_version_period(text)},
                callback=self.parse_url)
示例#14
0
 def parse_product(self, response):
     for image in response.xpath(
             "//div[@id='accordion-2']//tr[position() > 1]"):
         text = image.xpath("./td[2]//a[1]/text()").extract()
         if "firmware" in "".join(text).lower():
             item = FirmwareLoader(item=FirmwareImage(), response=response,
                                   selector=image, date_fmt=["%Y-%m-%d"])
             item.add_xpath("date", "td[1]//text()")
             item.add_value("description", text)
             item.add_xpath("url", "td[2]//a[1]/@href")
             item.add_value("product", response.meta["product"])
             item.add_value("vendor", self.name)
             item.add_value(
                 "version", FirmwareLoader.find_version_period(text))
             yield item.load_item()
示例#15
0
    def parse_product(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                text = response.xpath("//text()").extract()
                basename = href.split("/")[-1]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0: basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", FirmwareLoader.find_version_period(text))
                yield item.load_item()
示例#16
0
 def parse_product(self, response):
     for image in response.xpath(
             "//div[@id='accordion-2']//tr[position() > 1]"):
         text = image.xpath("./td[2]//a[1]/text()").extract()
         if "firmware" in "".join(text).lower():
             item = FirmwareLoader(item=FirmwareImage(), response=response,
                                   selector=image, date_fmt=["%Y-%m-%d"])
             item.add_xpath("date", "td[1]//text()")
             item.add_value("description", text)
             item.add_xpath("url", "td[2]//a[1]/@href")
             item.add_value("product", response.meta["product"])
             item.add_value("vendor", self.name)
             item.add_value(
                 "version", FirmwareLoader.find_version_period(text))
             yield item.load_item()
示例#17
0
    def parse_product(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                text = response.xpath("//text()").extract()
                basename = href.split("/")[-1]

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0:basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value("version",
                               FirmwareLoader.find_version_period(text))
                yield item.load_item()
示例#18
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath(".//text()").extract()[0]
            href = link.xpath(".//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                yield Request(url=urlparse.urljoin(response.url, href),
                              headers={"Referer": response.url},
                              callback=self.parse)
            elif href.endswith(".gz") and ".iso" not in href:
                # strip off multiple file extensions
                basename = os.path.splitext(text)[0]
                while ".img" in basename or ".iso" in basename:
                    basename = os.path.splitext(basename)[0]

                basename = basename.split("-")
                version = FirmwareLoader.find_version_period(basename)

                # attempt to parse filename and generate product/version
                # strings
                remove = [version] if version else []
                for i in range(0, len(basename)):
                    if "BETA" in basename[i]:
                        version += "-%s%s" % (basename[i], basename[i + 1])
                        remove.append(basename[i])
                        remove.append(basename[i + 1])
                    elif "RC" in basename[i]:
                        version += "-%s" % (basename[i])
                        remove.append(basename[i])
                    elif "RELEASE" in basename[i]:
                        remove.append(basename[i])

                basename = [x for x in basename if x not in remove]

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d-%b-%Y"])
                item.add_value("version", version)
                item.add_value("url", href)
                item.add_value(
                    "date",
                    item.find_date(link.xpath("following::text()").extract()))
                item.add_value("product", "-".join(basename))
                item.add_value("vendor", self.name)
                yield item.load_item()
示例#19
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath(".//text()").extract()[0]
            href = link.xpath(".//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    callback=self.parse)
            elif href.endswith(".gz") and ".iso" not in href:
                # strip off multiple file extensions
                basename = os.path.splitext(text)[0]
                while ".img" in basename or ".iso" in basename:
                    basename = os.path.splitext(basename)[0]

                basename = basename.split("-")
                version = FirmwareLoader.find_version_period(basename)

                # attempt to parse filename and generate product/version
                # strings
                remove = [version] if version else []
                for i in range(0, len(basename)):
                    if "BETA" in basename[i]:
                        version += "-%s%s" % (basename[i], basename[i + 1])
                        remove.append(basename[i])
                        remove.append(basename[i + 1])
                    elif "RC" in basename[i]:
                        version += "-%s" % (basename[i])
                        remove.append(basename[i])
                    elif "RELEASE" in basename[i]:
                        remove.append(basename[i])

                basename = [x for x in basename if x not in remove]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("version", version)
                item.add_value("url", href)
                item.add_value("date", item.find_date(
                    link.xpath("following::text()").extract()))
                item.add_value("product", "-".join(basename))
                item.add_value("vendor", self.name)
                yield item.load_item()
示例#20
0
    def parse(self, response):
        for link in response.xpath("//table//tr"):
            if not link.xpath("./td[2]/a"):
                continue

            text = link.xpath("./td[2]/a/text()").extract()[0]
            href = link.xpath("./td[2]//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                build = response.meta.get("build", None)
                product = response.meta.get("product", None)

                if not product:
                    product = text
                elif not build:
                    build = text.replace("build", "")

                yield Request(url=urlparse.urljoin(response.url, href),
                              headers={"Referer": response.url},
                              meta={
                                  "build": build,
                                  "product": product
                              },
                              callback=self.parse)
            elif any(
                    href.endswith(x)
                    for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y-%m-%d"])
                item.add_value("build", response.meta["build"])
                item.add_value("url", href)
                item.add_value(
                    "version",
                    FirmwareLoader.find_version_period(
                        os.path.splitext(text)[0].split("-")))
                item.add_value(
                    "date",
                    item.find_date(link.xpath("./td[3]/text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
示例#21
0
    def parse_product(self, response):
        for i in range(0, len(response.xpath("//ul[@id='normaltab2']//a"))):
            if "firmware" in "".join(response.xpath(
                    "//ul[@id='normaltab2']/li[%d]/a//text()" % (i + 1)).extract()).lower():
                for entry in response.xpath(
                        "//div[@id='normalcon2']/div[%d]//table/tr[1]" % (i + 1)):
                    version = entry.xpath("./td[2]//text()").extract()
                    date = entry.xpath("./td[4]//text()").extract()
                    href = entry.xpath("./td[5]//a/@href").extract()[0]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                    item.add_value(
                        "version", FirmwareLoader.find_version_period(version))
                    item.add_value("url", href)
                    item.add_value("date", item.find_date(date))
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.vendor)
                    yield item.load_item()
示例#22
0
    def parse_product(self, response):
        for i in range(0, len(response.xpath("//ul[@id='normaltab2']//a"))):
            if "firmware" in "".join(response.xpath(
                    "//ul[@id='normaltab2']/li[%d]/a//text()" % (i + 1)).extract()).lower():
                for entry in response.xpath(
                        "//div[@id='normalcon2']/div[%d]//table/tr[1]" % (i + 1)):
                    version = entry.xpath("./td[2]//text()").extract()
                    date = entry.xpath("./td[4]//text()").extract()
                    href = entry.xpath("./td[5]//a/@href").extract()[0]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                    item.add_value(
                        "version", FirmwareLoader.find_version_period(version))
                    item.add_value("url", href)
                    item.add_value("date", item.find_date(date))
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.vendor)
                    yield item.load_item()
示例#23
0
文件: qnap.py 项目: MikimotoH/scraper
    def parse_product(self, response):
        for row in response.xpath(
                "//div[@class='main_data_block']//table/tr[position() > 1]"):
            text = row.xpath("./td[1]//text()").extract()
            edition = row.xpath("./td[2]//text()").extract()
            date = row.xpath("./td[4]//text()").extract()
            hrefs = row.xpath("./td[5]//a/@href").extract()

            if hrefs:
                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"])
                item.add_value(
                    "version", FirmwareLoader.find_version_period(edition))
                item.add_value("build", FirmwareLoader.find_build(edition))
                item.add_value("url", hrefs[0])
                item.add_value("date", item.find_date(date))
                item.add_value("description", text[2].strip())
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
示例#24
0
    def parse_download(self, response):
        for firmware in response.xpath("//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"):
            product = (
                response.xpath("//div[@class='prodNavHeaderBody']//text()")
                .extract()[0]
                .replace(" Support & Drivers", "")
            )
            date = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract()
            version = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract()
            href = firmware.xpath(".//a/@href").extract()[0].replace("file-download", "file-redirect")
            text = firmware.xpath(".//a//text()").extract()[0]

            item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y"])
            item.add_value("url", href)
            item.add_value("product", product)
            item.add_value("date", item.find_date(date))
            item.add_value("description", text)
            item.add_value("version", item.find_version_period(version))
            item.add_value("vendor", self.name)
            yield item.load_item()
示例#25
0
    def parse_product(self, response):
        for row in response.xpath(
                "//div[@class='main_data_block']//table/tr[position() > 1]"):
            text = row.xpath("./td[1]//text()").extract()
            edition = row.xpath("./td[2]//text()").extract()
            date = row.xpath("./td[4]//text()").extract()
            hrefs = row.xpath("./td[5]//a/@href").extract()

            if hrefs:
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y/%m/%d"])
                item.add_value("version",
                               FirmwareLoader.find_version_period(edition))
                item.add_value("build", FirmwareLoader.find_build(edition))
                item.add_value("url", hrefs[0])
                item.add_value("date", item.find_date(date))
                item.add_value("description", text[2].strip())
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
示例#26
0
    def parse_json(self, response):
        mib = None
        json_response = json.loads(response.body_as_unicode())

        for entry in reversed(json_response["item"]):
            for file in reversed(entry["file"]):
                if file["filetypename"].lower() == "firmware" or file["isFirmF"] == "1":
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%y"])
                    item.add_value("version",
                                   FirmwareLoader.find_version_period([file["name"]]))
                    item.add_value("date", file["date"])
                    item.add_value("description", file["name"])
                    item.add_value("url", file["url"])
                    item.add_value("build", response.meta["revision"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    item.add_value("mib", mib)
                    yield item.load_item()
                elif "MIB" in file["name"]:
                    mib = file["url"]
示例#27
0
    def parse_product(self, response):
        for section in response.xpath("//ul[@id='tab_conbox']/li"):
            if u"升级软件" in "".join(section.xpath("./h3//text()").extract()):
                for entry in section.xpath(".//dd/a"):
                    text = entry.xpath(".//text()").extract()
                    href = entry.xpath("./@href").extract()[0]

                    desc = text[0]
                    # reverse text because hw version can come before version
                    # e.g. "FH330升级软件(V1.0) V1.0.0.24_CN"
                    if len(text) == 1:
                        text = text[0].split()
                        text.reverse()

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response)
                    item.add_value("version",
                                   FirmwareLoader.find_version_period(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("description", desc)
                    item.add_value("vendor", self.vendor)
                    yield item.load_item()
示例#28
0
    def parse_product(self, response):
        for section in response.xpath("//ul[@id='tab_conbox']/li"):
            if u"升级软件" in "".join(section.xpath("./h3//text()").extract()):
                for entry in section.xpath(".//dd/a"):
                    text = entry.xpath(".//text()").extract()
                    href = entry.xpath("./@href").extract()[0]

                    desc = text[0]
                    # reverse text because hw version can come before version
                    # e.g. "FH330升级软件(V1.0) V1.0.0.24_CN"
                    if len(text) == 1:
                        text = text[0].split()
                        text.reverse()

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response)
                    item.add_value(
                        "version", FirmwareLoader.find_version_period(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("description", desc)
                    item.add_value("vendor", self.vendor)
                    yield item.load_item()
示例#29
0
    def parse_json(self, response):
        mib = None
        json_response = json.loads(response.body_as_unicode())

        for entry in reversed(json_response["item"]):
            for file in reversed(entry["file"]):
                if file["filetypename"].lower() == "firmware" or file[
                        "isFirmF"] == "1":
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%y"])
                    item.add_value("version",
                                   FirmwareLoader.find_version_period([file["name"]]))
                    item.add_value("date", file["date"])
                    item.add_value("description", file["name"])
                    item.add_value("url", file["url"])
                    item.add_value("build", response.meta["revision"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    item.add_value("mib", mib)
                    yield item.load_item()
                elif "MIB" in file["name"]:
                    mib = file["url"]
示例#30
0
    def parse(self, response):
        for entry in response.xpath("//table/tr[position() > 3]"):
            if not entry.xpath("./td[2]/a"):
                continue

            text = entry.xpath("./td[2]/a//text()").extract()[0]
            href = entry.xpath("./td[2]/a/@href").extract()[0]
            date = entry.xpath("./td[3]//text()").extract()[0]

            if "DSM" in response.url:
                if href.endswith('/'):
                    build = None
                    version = response.meta.get(
                        "version", FirmwareLoader.find_version_period([text]))
                    if not FirmwareLoader.find_version_period([text]):
                        build = text[0: -1]

                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"build": build, "version": version},
                        callback=self.parse)
                elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]):
                    product = None
                    basename = os.path.splitext(text)[0].split("_")

                    if "DSM" in basename:
                        if response.meta["build"] in basename:
                            basename.remove(response.meta["build"])
                        basename.remove("DSM")
                        product = " ".join(basename)
                    else:
                        product = basename[-2]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                    item.add_value("build", response.meta["build"])
                    item.add_value("version", response.meta["version"])
                    item.add_value(
                        "mib", "http://dedl.synology.com/download/Document/MIBGuide/Synology_MIB_File.zip")
                    item.add_value("url", href)
                    item.add_value("date", date)
                    item.add_value("product", product)
                    item.add_value("vendor", self.name)
                    yield item.load_item()
            elif "VSFirmware" in response.url:
                if href.endswith('/'):
                    version, build = text[0: -1].split("-")

                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"build": build, "version": version},
                        callback=self.parse)
                elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]):
                    basename = os.path.splitext(text)[0].split("_")

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                    item.add_value("build", response.meta["build"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href)
                    item.add_value("date", date)
                    item.add_value("product", basename[0])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
示例#31
0
    def parse(self, response):
        # choose the "Product Drilldown" button
        if response.xpath(
                "//a[@id='ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_BasicSearchPanel_btnAdvancedSearch']"
        ):
            href = NetgearSpider.strip_js(
                response.xpath(
                    "//a[@id='ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_BasicSearchPanel_btnAdvancedSearch']/@href"
                ).extract()[0])

            yield FormRequest.from_response(response,
                                            formname="aspnetForm",
                                            formdata={"__EVENTTARGET": href},
                                            headers={"Referer": response.url},
                                            callback=self.parse)

        # continue iterating through product/model/os selector
        else:
            if response.xpath("//div[@id='LargeFirmware']//a"):
                mib = None

                for entry in response.xpath("//div[@id='LargeFirmware']//a"):
                    href = entry.xpath("./@data-durl").extract()
                    text = entry.xpath(".//text()").extract()

                    # sometimes it is 'href' instead of 'data-durl'
                    if not href:
                        href = entry.xpath("./@href").extract()

                    if "firmware" in " ".join(text).lower():
                        item = FirmwareLoader(item=FirmwareImage(),
                                              response=response)
                        item.add_value(
                            "version",
                            FirmwareLoader.find_version_period(text))
                        item.add_value("url", href[0])
                        item.add_value("description", text[0])
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        yield item.load_item()

                    elif "mib" in " ".join(text).lower():
                        mib = urlparse.urljoin(response.url, href[0].strip())

            elif "" not in response.xpath(
                    "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProduct']/option/@value"
            ).extract():
                for entry in response.xpath(
                        "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProduct']/option"
                ):
                    rsrc = entry.xpath("./@value").extract()[0]
                    text = entry.xpath(".//text()").extract()
                    if text and (response.url, rsrc) not in self.visited:
                        self.visited.append((response.url, rsrc))

                        yield FormRequest.from_response(
                            response,
                            formname="aspnetForm",
                            formdata={
                                "__EVENTTARGET":
                                "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProduct",
                                "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProduct":
                                rsrc,
                                "__ASYNCPOST:":
                                "true"
                            },
                            meta={"product": text[0]},
                            headers={"Referer": response.url},
                            callback=self.parse)

            elif "" not in response.xpath(
                    "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductFamily']/option/@value"
            ).extract():
                for entry in response.xpath(
                        "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductFamily']/option"
                ):
                    rsrc = entry.xpath("./@value").extract()[0]
                    text = entry.xpath(".//text()").extract()

                    if text and (response.url, rsrc) not in self.visited:
                        self.visited.append((response.url, rsrc))

                        yield FormRequest.from_response(
                            response,
                            formname="aspnetForm",
                            formdata={
                                "__EVENTTARGET":
                                "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductFamily",
                                "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductFamily":
                                rsrc,
                                "__ASYNCPOST:":
                                "true"
                            },
                            headers={"Referer": response.url},
                            callback=self.parse)

            elif "" not in response.xpath(
                    "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductCategory']/option/@value"
            ).extract():
                for entry in response.xpath(
                        "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductCategory']/option"
                ):
                    rsrc = entry.xpath("./@value").extract()[0]
                    text = entry.xpath(".//text()").extract()

                    if text and (response.url, rsrc) not in self.visited:
                        self.visited.append((response.url, rsrc))

                        yield FormRequest.from_response(
                            response,
                            formname="aspnetForm",
                            formdata={
                                "__EVENTTARGET":
                                "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductCategory",
                                "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductCategory":
                                rsrc,
                                "__ASYNCPOST:":
                                "true"
                            },
                            headers={"Referer": response.url},
                            callback=self.parse)
示例#32
0
    def parse(self, response):
        for entry in response.xpath("//table/tr[position() > 3]"):
            if not entry.xpath("./td[2]/a"):
                continue

            text = entry.xpath("./td[2]/a//text()").extract()[0]
            href = entry.xpath("./td[2]/a/@href").extract()[0]
            date = entry.xpath("./td[3]//text()").extract()[0]

            if "DSM" in response.url:
                if href.endswith('/'):
                    build = None
                    version = response.meta.get(
                        "version", FirmwareLoader.find_version_period([text]))
                    if not FirmwareLoader.find_version_period([text]):
                        build = text[0:-1]

                    yield Request(url=urlparse.urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={
                                      "build": build,
                                      "version": version
                                  },
                                  callback=self.parse)
                elif all(not href.lower().endswith(x)
                         for x in [".txt", ".md5", ".torrent"]):
                    product = None
                    basename = os.path.splitext(text)[0].split("_")

                    if "DSM" in basename:
                        if response.meta["build"] in basename:
                            basename.remove(response.meta["build"])
                        basename.remove("DSM")
                        product = " ".join(basename)
                    else:
                        product = basename[-2]

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%d-%b-%Y"])
                    item.add_value("build", response.meta["build"])
                    item.add_value("version", response.meta["version"])
                    item.add_value(
                        "mib",
                        "http://dedl.synology.com/download/Document/MIBGuide/Synology_MIB_File.zip"
                    )
                    item.add_value("url", href)
                    item.add_value("date", date)
                    item.add_value("product", product)
                    item.add_value("vendor", self.name)
                    yield item.load_item()
            elif "VSFirmware" in response.url:
                if href.endswith('/'):
                    version, build = text[0:-1].split("-")

                    yield Request(url=urlparse.urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={
                                      "build": build,
                                      "version": version
                                  },
                                  callback=self.parse)
                elif all(not href.lower().endswith(x)
                         for x in [".txt", ".md5", ".torrent"]):
                    basename = os.path.splitext(text)[0].split("_")

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%d-%b-%Y"])
                    item.add_value("build", response.meta["build"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href)
                    item.add_value("date", date)
                    item.add_value("product", basename[0])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
示例#33
0
    def parse(self, response):
        if response.xpath("//form[@name='UCagreement']"):
            for href in response.xpath(
                    "//div[@id='productAndDoc']").extract()[0].split('"'):
                if "downloads.polycom.com" in href:
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%B %d, %Y"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", response.meta["date"])
                    item.add_value("description", response.meta["description"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()

        elif response.xpath("//div[@id='ContentChannel']"):
            for entry in response.xpath("//div[@id='ContentChannel']//li"):
                if not entry.xpath("./a"):
                    continue

                text = entry.xpath("./a//text()").extract()[0]
                href = entry.xpath("./a/@href").extract()[0].strip()
                date = entry.xpath("./span//text()").extract()

                path = urlparse(href).path

                if any(x in text.lower() for x in [
                        "end user license agreement", "eula", "release notes",
                        "mac os", "windows", "guide", "(pdf)", "sample"
                ]) or href.endswith(".pdf"):
                    continue

                elif any(path.endswith(x) for x in
                         [".htm", ".html"]) or "(html)" in text.lower():
                    yield Request(url=urljoin(response.url,
                                              PolycomSpider.fix_url(href)),
                                  meta={
                                      "product":
                                      response.meta["product"]
                                      if "product" in response.meta else text,
                                      "date":
                                      date,
                                      "version":
                                      FirmwareLoader.find_version_period(
                                          [text]),
                                      "description":
                                      text
                                  },
                                  headers={"Referer": response.url},
                                  callback=self.parse)

                elif path:
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%B %d, %Y"])
                    item.add_value("version",
                                   FirmwareLoader.find_version_period([text]))
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", item.find_date(date))
                    item.add_value("description", text)
                    # item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
示例#34
0
    def parse(self, response):
        # choose the "Product Drilldown" button
        if response.xpath(
                "//a[@id='ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_BasicSearchPanel_btnAdvancedSearch']"):
            href = NetgearSpider.strip_js(response.xpath(
                "//a[@id='ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_BasicSearchPanel_btnAdvancedSearch']/@href").extract()[0])

            yield FormRequest.from_response(response,
                                            formname="aspnetForm",
                                            formdata={"__EVENTTARGET": href},
                                            headers={"Referer": response.url},
                                            callback=self.parse)

        # continue iterating through product/model/os selector
        else:
            if response.xpath("//div[@id='LargeFirmware']//a"):
                mib = None

                for entry in response.xpath("//div[@id='LargeFirmware']//a"):
                    href = entry.xpath("./@data-durl").extract()
                    text = entry.xpath(".//text()").extract()

                    # sometimes it is 'href' instead of 'data-durl'
                    if not href:
                        href = entry.xpath("./@href").extract()

                    if "firmware" in " ".join(text).lower():
                        item = FirmwareLoader(
                            item=FirmwareImage(), response=response)
                        item.add_value(
                            "version", FirmwareLoader.find_version_period(text))
                        item.add_value("url", href[0])
                        item.add_value("description", text[0])
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        yield item.load_item()

                    elif "mib" in " ".join(text).lower():
                        mib = urlparse.urljoin(response.url, href[0].strip())

            elif "" not in response.xpath("//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProduct']/option/@value").extract():
                for entry in response.xpath(
                        "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProduct']/option"):
                    rsrc = entry.xpath("./@value").extract()[0]
                    text = entry.xpath(".//text()").extract()
                    if text and (response.url, rsrc) not in self.visited:
                        self.visited.append((response.url, rsrc))

                        yield FormRequest.from_response(response,
                                                        formname="aspnetForm",
                                                        formdata={"__EVENTTARGET": "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProduct",
                                                                  "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProduct": rsrc, "__ASYNCPOST:": "true"},
                                                        meta={
                                                            "product": text[0]},
                                                        headers={
                                                            "Referer": response.url},
                                                        callback=self.parse)

            elif "" not in response.xpath("//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductFamily']/option/@value").extract():
                for entry in response.xpath(
                        "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductFamily']/option"):
                    rsrc = entry.xpath("./@value").extract()[0]
                    text = entry.xpath(".//text()").extract()

                    if text and (response.url, rsrc) not in self.visited:
                        self.visited.append((response.url, rsrc))

                        yield FormRequest.from_response(response,
                                                        formname="aspnetForm",
                                                        formdata={"__EVENTTARGET": "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductFamily",
                                                                  "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductFamily": rsrc, "__ASYNCPOST:": "true"},
                                                        headers={
                                                            "Referer": response.url},
                                                        callback=self.parse)

            elif "" not in response.xpath("//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductCategory']/option/@value").extract():
                for entry in response.xpath(
                        "//select[@name='ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductCategory']/option"):
                    rsrc = entry.xpath("./@value").extract()[0]
                    text = entry.xpath(".//text()").extract()

                    if text and (response.url, rsrc) not in self.visited:
                        self.visited.append((response.url, rsrc))

                        yield FormRequest.from_response(response,
                                                        formname="aspnetForm",
                                                        formdata={"__EVENTTARGET": "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductCategory",
                                                                  "ctl00$ctl00$ctl00$mainContent$localizedContent$bodyCenter$adsPanel$lbProductCategory": rsrc, "__ASYNCPOST:": "true"},
                                                        headers={
                                                            "Referer": response.url},
                                                        callback=self.parse)