Exemplo n.º 1
0
    def parse_product(self, response):
        if response.xpath("//dl[@id='dlDropDownBox']") and "build" not in response.meta:
            for entry in response.xpath("//dl[@id='dlDropDownBox']//li/a"):
                href = entry.xpath("./@href").extract()[0]
                text = entry.xpath(".//text()").extract()[0]

                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    meta={"product": response.meta["product"], "build": text},
                    headers={"Referer": response.url},
                    callback=self.parse_product,
                )
        else:
            sdk = None

            for href in reversed(response.xpath("//div[@id='content_gpl_code']//a/@href").extract()):
                sdk = href

            for entry in response.xpath("//div[@id='content_firmware']//table"):
                href = entry.xpath("./tbody/tr[1]/th[1]//a/@href").extract()[0]
                text = entry.xpath("./tbody/tr[1]/th[1]//a//text()").extract()[0]
                date = entry.xpath("./tbody/tr[1]/td[1]//text()").extract()

                item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"])
                item.add_value("url", href)
                item.add_value("date", item.find_date(date))
                item.add_value("description", text)
                item.add_value("product", response.meta["product"])
                item.add_value("build", response.meta["build"] if "build" in response.meta else None)
                item.add_value("vendor", self.vendor)
                item.add_value("sdk", sdk)
                yield item.load_item()
Exemplo n.º 2
0
    def parse_download(self, response):
        for firmware in response.xpath(
                "//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"
        ):
            product = response.xpath(
                "//div[@class='prodNavHeaderBody']//text()").extract(
                )[0].replace(" Support & Drivers", "")
            date = firmware.xpath(
                ".//ul[@class='dateVersion']//strong/text()").extract()
            version = firmware.xpath(
                ".//ul[@class='dateVersion']//strong/text()").extract()
            href = firmware.xpath(".//a/@href").extract()[0].replace(
                "file-download", "file-redirect")
            text = firmware.xpath(".//a//text()").extract()[0]

            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response,
                                  date_fmt=["%b %d, %Y"])
            item.add_value("url", href)
            item.add_value("product", product)
            item.add_value("date", item.find_date(date))
            item.add_value("description", text)
            item.add_value("version", item.find_version_period(version))
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 3
0
    def parse_kb(self, response):
        mib = None

        # need to perform some nasty segmentation because different firmware versions are not clearly separated
        # reverse order to get MIB before firmware items
        for entry in reversed(
                response.xpath(
                    "//div[@id='support-article-downloads']/div/p")):
            for segment in reversed(entry.extract().split("<br><br>")):
                resp = HtmlResponse(url=response.url,
                                    body=segment,
                                    encoding=response.encoding)
                for href in resp.xpath("//a/@href").extract():
                    text = resp.xpath("//text()").extract()

                    if "MIBs" in href:
                        mib = href

                    elif "firmware" in href:
                        text = resp.xpath("//text()").extract()

                        item = FirmwareLoader(item=FirmwareImage(),
                                              response=resp,
                                              date_fmt=["%m/%d/%Y"])
                        item.add_value("date", item.find_date(text))
                        item.add_xpath("url", "//a/@href")
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        item.add_value(
                            "version",
                            FirmwareLoader.find_version_period(text))
                        yield item.load_item()
Exemplo n.º 4
0
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url,
                                body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(
                        item=FirmwareImage(),
                        response=response,
                        date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 5
0
    def parse_kb(self, response):
        mib = None

        # need to perform some nasty segmentation because different firmware versions are not clearly separated
        # reverse order to get MIB before firmware items
        for entry in reversed(response.xpath(
                "//div[@id='support-article-downloads']/div/p")):
            for segment in reversed(entry.extract().split("<br><br>")):
                resp = HtmlResponse(
                    url=response.url, body=segment, encoding=response.encoding)
                for href in resp.xpath("//a/@href").extract():
                    text = resp.xpath("//text()").extract()

                    if "MIBs" in href:
                        mib = href

                    elif "firmware" in href:
                        text = resp.xpath("//text()").extract()

                        item = FirmwareLoader(
                            item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"])
                        item.add_value("date", item.find_date(text))
                        item.add_xpath("url", "//a/@href")
                        item.add_value("mib", mib)
                        item.add_value("product", response.meta["product"])
                        item.add_value("vendor", self.name)
                        item.add_value(
                            "version", FirmwareLoader.find_version_period(text))
                        yield item.load_item()
Exemplo n.º 6
0
    def parse_product(self, response):
        # types: firmware = 20, gpl source = 30, bios = 3
        for entry in response.xpath(
                "//div[@id='div_type_20']/div[@id='download-os-answer-table']"):
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y/%m/%d"])

            version = FirmwareLoader.find_version_period(
                entry.xpath("./p//text()").extract())
            gpl = None

            # grab first download link (e.g. DLM instead of global or p2p)
            href = entry.xpath("./table//tr[3]//a/@href").extract()[0]

            # attempt to find matching source code entry
            if version:
                for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"):
                    if version in "".join(source.xpath("./p//text()").extract()):
                        gpl = source.xpath("./table//tr[3]//a/@href").extract()[0]

            item.add_value("version", version)
            item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract()))
            item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract()))
            item.add_value("url", href)
            item.add_value("sdk", gpl)
            item.add_value("product", response.meta["product"])
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 7
0
    def parse_kb(self, response):
        # initial html tokenization to find regions segmented by e.g. "======"
        # or "------"
        filtered = response.xpath(
            "//div[@class='sfdc_richtext']").extract()[0].split("=-")

        for entry in [x and x.strip() for x in filtered]:
            resp = HtmlResponse(url=response.url, body=entry,
                                encoding=response.encoding)

            for link in resp.xpath("//a"):
                href = link.xpath("@href").extract()[0]
                if "cache-www" in href:
                    text = resp.xpath("//text()").extract()
                    text_next = link.xpath("following::text()").extract()

                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%b %d, %Y", "%B %d, %Y",
                                                    "%m/%d/%Y"])

                    version = FirmwareLoader.find_version_period(text_next)
                    if not version:
                        version = FirmwareLoader.find_version_period(text)

                    item.add_value("version", version)
                    item.add_value("date", item.find_date(text))
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 8
0
    def parse_url(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                if "package/" not in text:
                    product = "%s-%s" % (response.meta["product"], text[0: -1]) if "product" in response.meta else text[0: -1]

                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"version": response.meta[
                            "version"], "product": product},
                        callback=self.parse_url)
            elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value("date", item.find_date(
                    link.xpath("following::text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 9
0
    def parse_url(self, response):
        for link in response.xpath("//a"):
            text = link.xpath("text()").extract()[0]
            href = link.xpath("@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                if "package/" not in text:
                    product = "%s-%s" % (
                        response.meta["product"], text[0:-1]
                    ) if "product" in response.meta else text[0:-1]

                    yield Request(url=urlparse.urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={
                                      "version": response.meta["version"],
                                      "product": product
                                  },
                                  callback=self.parse_url)
            elif any(
                    href.endswith(x)
                    for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d-%b-%Y"])
                item.add_value("version", response.meta["version"])
                item.add_value("url", href)
                item.add_value(
                    "date",
                    item.find_date(link.xpath("following::text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 10
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"):
            desc = entry.xpath(".//text()").extract()

            for link in entry.xpath(".//a"):
                href = link.xpath("./@href").extract()[0]
                text = link.xpath(".//text()").extract()[0]

                if "_a=download" not in href:
                    yield Request(url=urllib.parse.urljoin(response.url, href),
                                  headers={"Referer": response.url},
                                  meta={"product": text.strip().split(' ')},
                                  callback=self.parse)
                elif "firmware" in text.lower() or "f/w" in text.lower():
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%Y", "%m/%d/%y"])
                    item.add_value("version",
                                   FirmwareLoader.find_version(desc))
                    item.add_value("date", item.find_date(desc))
                    item.add_value("description", text)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 11
0
    def parse_product(self, response):
        text = (
            response.xpath("//div[@class='download']/table[1]//tr[1]/td[2]//text()")
            .extract()[0]
            .encode("ascii", errors="ignore")
        )
        date = response.xpath("//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract()
        href = response.xpath("//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract()[0]
        desc = response.xpath("//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract()[0].encode("utf-8")

        build = None
        product = None
        if "_" in text:
            build = text.split("_")[1]
            product = text.split("_")[0]
        elif " " in text:
            product = text.split(" ")[0]

        item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"])
        item.add_value("url", href.encode("utf-8"))
        item.add_value("date", item.find_date(date))
        item.add_value("description", desc)
        item.add_value("build", build)
        item.add_value("product", product)
        item.add_value("vendor", self.vendor)
        yield item.load_item()
Exemplo n.º 12
0
    def parse(self, response):
        for link in response.xpath("//table//tr"):
            if not link.xpath("./td[2]/a"):
                continue

            text = link.xpath("./td[2]/a/text()").extract()[0]
            href = link.xpath("./td[2]//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith("/"):
                build = response.meta.get("build", None)
                product = response.meta.get("product", None)

                if not product:
                    product = text
                elif not build:
                    build = text.replace("build", "")

                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    meta={"build": build, "product": product},
                    callback=self.parse,
                )
            elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                item.add_value("build", response.meta["build"])
                item.add_value("url", href)
                item.add_value("version", FirmwareLoader.find_version_period(os.path.splitext(text)[0].split("-")))
                item.add_value("date", item.find_date(link.xpath("./td[3]/text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 13
0
    def parse_product(self, response):
        text = response.xpath(
            "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract(
            )[0].encode("ascii", errors="ignore")
        date = response.xpath(
            "//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract(
            )
        href = response.xpath(
            "//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract(
            )[0]
        desc = response.xpath(
            "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract(
            )[0].encode("utf-8")

        build = None
        product = None
        if "_" in text:
            build = text.split("_")[1]
            product = text.split("_")[0]
        elif " " in text:
            product = text.split(" ")[0]

        item = FirmwareLoader(item=FirmwareImage(),
                              response=response,
                              date_fmt=["%Y/%m/%d"])
        item.add_value("url", href.encode("utf-8"))
        item.add_value("date", item.find_date(date))
        item.add_value("description", desc)
        item.add_value("build", build)
        item.add_value("product", product)
        item.add_value("vendor", self.vendor)
        yield item.load_item()
Exemplo n.º 14
0
    def parse_product(self, response):

        #<a href="#Firmware"><span>Firmware</span></a>
        if not response.xpath("//a[@href=\"#Firmware\"]").extract():
            yield None

        description = response.xpath(
            "//div[@class=\"product-name\"]//strong/text()").extract()[0]
        url = response.xpath(
            "//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href"
        ).extract()[0]
        date = response.xpath(
            "//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()"
        ).extract()[0]

        item = FirmwareLoader(item=FirmwareImage(),
                              response=response,
                              date_fmt=["%d/%m/%y"])

        item.add_value("url", url)
        item.add_value("date", item.find_date(date))
        item.add_value("description", description)
        item.add_value("product", response.meta["product"])
        item.add_value("version", response.meta["version"])
        item.add_value("vendor", self.vendor)
        yield item.load_item()
Exemplo n.º 15
0
    def parse_download(self, response):
        for entry in response.xpath("//div[@class='downloadtable']"):
            text = entry.xpath(".//text()").extract()

            if "firmware" in " ".join(text).lower():
                text = entry.xpath(
                    ".//li[@class='maindescription' and position() = 1]//text()"
                ).extract()
                date = entry.xpath(
                    ".//li[@class='maindescription' and position() = 2]//text()"
                ).extract()
                href = entry.xpath(
                    ".//li[@class='maindescription']//a/@onclick"
                ).extract()[0].split(
                    '\'')[1] + "&button=Continue+with+Download&Continue=yes"

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%m/%d/%Y"])
                item.add_value("url", href)
                item.add_value("product", response.meta["product"])
                item.add_value("date", item.find_date(date))
                item.add_value("version", FirmwareLoader.find_version(text))
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 16
0
    def parse(self, response):
        for entry in response.xpath(
                "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"):
            desc = entry.xpath(".//text()").extract()

            for link in entry.xpath(".//a"):
                href = link.xpath("./@href").extract()[0]
                text = link.xpath(".//text()").extract()[0]

                if "_a=download" not in href:
                    yield Request(
                        url=urlparse.urljoin(response.url, href),
                        headers={"Referer": response.url},
                        meta={"product": text.strip().split(' ')},
                        callback=self.parse)
                elif "firmware" in text.lower() or "f/w" in text.lower():
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%m/%d/%Y", "%m/%d/%y"])
                    item.add_value("version", FirmwareLoader.find_version(desc))
                    item.add_value("date", item.find_date(desc))
                    item.add_value("description", text)
                    item.add_value("url", href)
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 17
0
    def parse_product(self, response):
        # types: firmware = 20, gpl source = 30, bios = 3
        for entry in response.xpath(
                "//div[@id='div_type_20']/div[@id='download-os-answer-table']"):
            item = FirmwareLoader(item=FirmwareImage(),
                                  response=response, date_fmt=["%Y/%m/%d"])

            version = FirmwareLoader.find_version_period(
                entry.xpath("./p//text()").extract())
            gpl = None

            # grab first download link (e.g. DLM instead of global or p2p)
            href = entry.xpath("./table//tr[3]//a/@href").extract()[0]

            # attempt to find matching source code entry
            if version:
                for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"):
                    if version in "".join(source.xpath("./p//text()").extract()):
                        gpl = source.xpath("./table//tr[3]//a/@href").extract()[0]

            item.add_value("version", version)
            item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract()))
            item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract()))
            item.add_value("url", href)
            item.add_value("sdk", gpl)
            item.add_value("product", response.meta["product"])
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 18
0
    def parse(self, response):
        if response.xpath("//form[@name='UCagreement']"):
            for href in response.xpath(
                    "//div[@id='productAndDoc']").extract()[0].split('"'):
                if "downloads.polycom.com" in href:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", response.meta["date"])
                    item.add_value("description", response.meta["description"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()

        elif response.xpath("//div[@id='ContentChannel']"):
            for entry in response.xpath("//div[@id='ContentChannel']//li"):
                if not entry.xpath("./a"):
                    continue

                text = entry.xpath("./a//text()").extract()[0]
                href = entry.xpath("./a/@href").extract()[0].strip()
                date = entry.xpath("./span//text()").extract()

                path = urlparse.urlparse(href).path

                if any(x in text.lower() for x in ["end user license agreement", "eula", "release notes",
                                                   "mac os", "windows", "guide", "(pdf)", "sample"]) or href.endswith(".pdf"):
                    continue

                elif any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower():
                    yield Request(
                        url=urlparse.urljoin(
                            response.url, PolycomSpider.fix_url(href)),
                        meta={"product": response.meta["product"] if "product" in response.meta else text,
                              "date": date, "version": FirmwareLoader.find_version_period([text]), "description": text},
                        headers={"Referer": response.url},
                        callback=self.parse)

                elif path:
                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"])
                    item.add_value(
                        "version", FirmwareLoader.find_version_period([text]))
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", item.find_date(date))
                    item.add_value("description", text)
                    # item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()
Exemplo n.º 19
0
    def parse_product(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                text = response.xpath("//text()").extract()
                basename = href.split("/")[-1]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0: basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", FirmwareLoader.find_version_period(text))
                yield item.load_item()
Exemplo n.º 20
0
    def parse_product(self, response):
        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                text = response.xpath("//text()").extract()
                basename = href.split("/")[-1]

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0:basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value("version",
                               FirmwareLoader.find_version_period(text))
                yield item.load_item()
Exemplo n.º 21
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath(".//text()").extract()[0]
            href = link.xpath(".//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                yield Request(url=urlparse.urljoin(response.url, href),
                              headers={"Referer": response.url},
                              callback=self.parse)
            elif href.endswith(".gz") and ".iso" not in href:
                # strip off multiple file extensions
                basename = os.path.splitext(text)[0]
                while ".img" in basename or ".iso" in basename:
                    basename = os.path.splitext(basename)[0]

                basename = basename.split("-")
                version = FirmwareLoader.find_version_period(basename)

                # attempt to parse filename and generate product/version
                # strings
                remove = [version] if version else []
                for i in range(0, len(basename)):
                    if "BETA" in basename[i]:
                        version += "-%s%s" % (basename[i], basename[i + 1])
                        remove.append(basename[i])
                        remove.append(basename[i + 1])
                    elif "RC" in basename[i]:
                        version += "-%s" % (basename[i])
                        remove.append(basename[i])
                    elif "RELEASE" in basename[i]:
                        remove.append(basename[i])

                basename = [x for x in basename if x not in remove]

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d-%b-%Y"])
                item.add_value("version", version)
                item.add_value("url", href)
                item.add_value(
                    "date",
                    item.find_date(link.xpath("following::text()").extract()))
                item.add_value("product", "-".join(basename))
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 22
0
    def parse(self, response):
        for link in response.xpath("//a"):
            text = link.xpath(".//text()").extract()[0]
            href = link.xpath(".//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                yield Request(
                    url=urlparse.urljoin(response.url, href),
                    headers={"Referer": response.url},
                    callback=self.parse)
            elif href.endswith(".gz") and ".iso" not in href:
                # strip off multiple file extensions
                basename = os.path.splitext(text)[0]
                while ".img" in basename or ".iso" in basename:
                    basename = os.path.splitext(basename)[0]

                basename = basename.split("-")
                version = FirmwareLoader.find_version_period(basename)

                # attempt to parse filename and generate product/version
                # strings
                remove = [version] if version else []
                for i in range(0, len(basename)):
                    if "BETA" in basename[i]:
                        version += "-%s%s" % (basename[i], basename[i + 1])
                        remove.append(basename[i])
                        remove.append(basename[i + 1])
                    elif "RC" in basename[i]:
                        version += "-%s" % (basename[i])
                        remove.append(basename[i])
                    elif "RELEASE" in basename[i]:
                        remove.append(basename[i])

                basename = [x for x in basename if x not in remove]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"])
                item.add_value("version", version)
                item.add_value("url", href)
                item.add_value("date", item.find_date(
                    link.xpath("following::text()").extract()))
                item.add_value("product", "-".join(basename))
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 23
0
    def parse(self, response):
        for link in response.xpath("//table//tr"):
            if not link.xpath("./td[2]/a"):
                continue

            text = link.xpath("./td[2]/a/text()").extract()[0]
            href = link.xpath("./td[2]//@href").extract()[0]

            if ".." in href:
                continue
            elif href.endswith('/'):
                build = response.meta.get("build", None)
                product = response.meta.get("product", None)

                if not product:
                    product = text
                elif not build:
                    build = text.replace("build", "")

                yield Request(url=urlparse.urljoin(response.url, href),
                              headers={"Referer": response.url},
                              meta={
                                  "build": build,
                                  "product": product
                              },
                              callback=self.parse)
            elif any(
                    href.endswith(x)
                    for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]):
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y-%m-%d"])
                item.add_value("build", response.meta["build"])
                item.add_value("url", href)
                item.add_value(
                    "version",
                    FirmwareLoader.find_version_period(
                        os.path.splitext(text)[0].split("-")))
                item.add_value(
                    "date",
                    item.find_date(link.xpath("./td[3]/text()").extract()))
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 24
0
    def parse_product(self, response):
        for i in range(0, len(response.xpath("//ul[@id='normaltab2']//a"))):
            if "firmware" in "".join(response.xpath(
                    "//ul[@id='normaltab2']/li[%d]/a//text()" % (i + 1)).extract()).lower():
                for entry in response.xpath(
                        "//div[@id='normalcon2']/div[%d]//table/tr[1]" % (i + 1)):
                    version = entry.xpath("./td[2]//text()").extract()
                    date = entry.xpath("./td[4]//text()").extract()
                    href = entry.xpath("./td[5]//a/@href").extract()[0]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                    item.add_value(
                        "version", FirmwareLoader.find_version_period(version))
                    item.add_value("url", href)
                    item.add_value("date", item.find_date(date))
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.vendor)
                    yield item.load_item()
Exemplo n.º 25
0
    def parse_product(self, response):
        for i in range(0, len(response.xpath("//ul[@id='normaltab2']//a"))):
            if "firmware" in "".join(response.xpath(
                    "//ul[@id='normaltab2']/li[%d]/a//text()" % (i + 1)).extract()).lower():
                for entry in response.xpath(
                        "//div[@id='normalcon2']/div[%d]//table/tr[1]" % (i + 1)):
                    version = entry.xpath("./td[2]//text()").extract()
                    date = entry.xpath("./td[4]//text()").extract()
                    href = entry.xpath("./td[5]//a/@href").extract()[0]

                    item = FirmwareLoader(
                        item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"])
                    item.add_value(
                        "version", FirmwareLoader.find_version_period(version))
                    item.add_value("url", href)
                    item.add_value("date", item.find_date(date))
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.vendor)
                    yield item.load_item()
Exemplo n.º 26
0
    def parse_product(self, response):
        if response.xpath(
                "//dl[@id='dlDropDownBox']") and "build" not in response.meta:
            for entry in response.xpath("//dl[@id='dlDropDownBox']//li/a"):
                href = entry.xpath("./@href").extract()[0]
                text = entry.xpath(".//text()").extract()[0]

                yield Request(url=urlparse.urljoin(response.url, href),
                              meta={
                                  "product": response.meta["product"],
                                  "build": text
                              },
                              headers={"Referer": response.url},
                              callback=self.parse_product)
        else:
            sdk = None

            for href in reversed(
                    response.xpath(
                        "//div[@id='content_gpl_code']//a/@href").extract()):
                sdk = href

            for entry in response.xpath(
                    "//div[@id='content_firmware']//table"):
                href = entry.xpath("./tbody/tr[1]/th[1]//a/@href").extract()[0]
                text = entry.xpath(
                    "./tbody/tr[1]/th[1]//a//text()").extract()[0]
                date = entry.xpath("./tbody/tr[1]/td[1]//text()").extract()

                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%d/%m/%y"])
                item.add_value("url", href)
                item.add_value("date", item.find_date(date))
                item.add_value("description", text)
                item.add_value("product", response.meta["product"])
                item.add_value(
                    "build", response.meta["build"]
                    if "build" in response.meta else None)
                item.add_value("vendor", self.vendor)
                item.add_value("sdk", sdk)
                yield item.load_item()
Exemplo n.º 27
0
    def parse(self, response):

        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                if href.startswith("//"):
                    href = "http:" + href
                text = response.xpath("//text()").extract()
                items = href.split('/')
                version = items[-2]
                basename = items[-1]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0: basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", version)
                yield item.load_item()
Exemplo n.º 28
0
    def parse(self, response):

        for href in response.xpath("//a/@href").extract():
            if href.endswith(".npk") or href.endswith(".lzb"):
                if href.startswith("//"):
                    href = "http:" + href
                text = response.xpath("//text()").extract()
                items = href.split('/')
                version = items[-2]
                basename = items[-1]

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"])
                item.add_value("date", item.find_date(text))
                item.add_value("url", href)
                item.add_value("product", basename[0: basename.rfind("-")])
                item.add_value("vendor", self.name)
                item.add_value(
                    "version", version)
                yield item.load_item()
Exemplo n.º 29
0
    def parse_product(self, response):

        #<a href="#Firmware"><span>Firmware</span></a>
        if not response.xpath("//a[@href=\"#Firmware\"]").extract():
            yield None

        description = response.xpath("//div[@class=\"product-name\"]//strong/text()").extract()[0]
        url = response.xpath("//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href").extract()[0]
        date = response.xpath("//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()").extract()[0]

        item = FirmwareLoader(
            item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"])

        item.add_value("url", url)
        item.add_value("date", item.find_date(date))
        item.add_value("description", description)
        item.add_value("product", response.meta["product"])
        item.add_value("version", response.meta["version"])
        item.add_value("vendor", self.vendor)
        yield item.load_item()
Exemplo n.º 30
0
    def parse_product(self, response):
        for row in response.xpath(
                "//div[@class='main_data_block']//table/tr[position() > 1]"):
            text = row.xpath("./td[1]//text()").extract()
            edition = row.xpath("./td[2]//text()").extract()
            date = row.xpath("./td[4]//text()").extract()
            hrefs = row.xpath("./td[5]//a/@href").extract()

            if hrefs:
                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"])
                item.add_value(
                    "version", FirmwareLoader.find_version_period(edition))
                item.add_value("build", FirmwareLoader.find_build(edition))
                item.add_value("url", hrefs[0])
                item.add_value("date", item.find_date(date))
                item.add_value("description", text[2].strip())
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 31
0
    def parse_download(self, response):
        for entry in response.xpath("//div[@class='downloadtable']"):
            text = entry.xpath(".//text()").extract()

            if "firmware" in " ".join(text).lower():
                text = entry.xpath(
                    ".//li[@class='maindescription' and position() = 1]//text()").extract()
                date = entry.xpath(
                    ".//li[@class='maindescription' and position() = 2]//text()").extract()
                href = entry.xpath(".//li[@class='maindescription']//a/@onclick").extract()[
                    0].split('\'')[1] + "&button=Continue+with+Download&Continue=yes"

                item = FirmwareLoader(
                    item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y"])
                item.add_value("url", href)
                item.add_value("product", response.meta["product"])
                item.add_value("date", item.find_date(date))
                item.add_value("version", FirmwareLoader.find_version(text))
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 32
0
    def parse_download(self, response):
        for firmware in response.xpath("//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"):
            product = (
                response.xpath("//div[@class='prodNavHeaderBody']//text()")
                .extract()[0]
                .replace(" Support & Drivers", "")
            )
            date = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract()
            version = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract()
            href = firmware.xpath(".//a/@href").extract()[0].replace("file-download", "file-redirect")
            text = firmware.xpath(".//a//text()").extract()[0]

            item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y"])
            item.add_value("url", href)
            item.add_value("product", product)
            item.add_value("date", item.find_date(date))
            item.add_value("description", text)
            item.add_value("version", item.find_version_period(version))
            item.add_value("vendor", self.name)
            yield item.load_item()
Exemplo n.º 33
0
    def parse_product(self, response):
        for row in response.xpath(
                "//div[@class='main_data_block']//table/tr[position() > 1]"):
            text = row.xpath("./td[1]//text()").extract()
            edition = row.xpath("./td[2]//text()").extract()
            date = row.xpath("./td[4]//text()").extract()
            hrefs = row.xpath("./td[5]//a/@href").extract()

            if hrefs:
                item = FirmwareLoader(item=FirmwareImage(),
                                      response=response,
                                      date_fmt=["%Y/%m/%d"])
                item.add_value("version",
                               FirmwareLoader.find_version_period(edition))
                item.add_value("build", FirmwareLoader.find_build(edition))
                item.add_value("url", hrefs[0])
                item.add_value("date", item.find_date(date))
                item.add_value("description", text[2].strip())
                item.add_value("product", response.meta["product"])
                item.add_value("vendor", self.name)
                yield item.load_item()
Exemplo n.º 34
0
    def parse(self, response):
        if response.xpath("//form[@name='UCagreement']"):
            for href in response.xpath(
                    "//div[@id='productAndDoc']").extract()[0].split('"'):
                if "downloads.polycom.com" in href:
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%B %d, %Y"])
                    item.add_value("version", response.meta["version"])
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", response.meta["date"])
                    item.add_value("description", response.meta["description"])
                    item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()

        elif response.xpath("//div[@id='ContentChannel']"):
            for entry in response.xpath("//div[@id='ContentChannel']//li"):
                if not entry.xpath("./a"):
                    continue

                text = entry.xpath("./a//text()").extract()[0]
                href = entry.xpath("./a/@href").extract()[0].strip()
                date = entry.xpath("./span//text()").extract()

                path = urlparse(href).path

                if any(x in text.lower() for x in [
                        "end user license agreement", "eula", "release notes",
                        "mac os", "windows", "guide", "(pdf)", "sample"
                ]) or href.endswith(".pdf"):
                    continue

                elif any(path.endswith(x) for x in
                         [".htm", ".html"]) or "(html)" in text.lower():
                    yield Request(url=urljoin(response.url,
                                              PolycomSpider.fix_url(href)),
                                  meta={
                                      "product":
                                      response.meta["product"]
                                      if "product" in response.meta else text,
                                      "date":
                                      date,
                                      "version":
                                      FirmwareLoader.find_version_period(
                                          [text]),
                                      "description":
                                      text
                                  },
                                  headers={"Referer": response.url},
                                  callback=self.parse)

                elif path:
                    item = FirmwareLoader(item=FirmwareImage(),
                                          response=response,
                                          date_fmt=["%B %d, %Y"])
                    item.add_value("version",
                                   FirmwareLoader.find_version_period([text]))
                    item.add_value("url", href.encode("utf-8"))
                    item.add_value("date", item.find_date(date))
                    item.add_value("description", text)
                    # item.add_value("product", response.meta["product"])
                    item.add_value("vendor", self.name)
                    yield item.load_item()