def parse_product(self, response): if response.xpath("//dl[@id='dlDropDownBox']") and "build" not in response.meta: for entry in response.xpath("//dl[@id='dlDropDownBox']//li/a"): href = entry.xpath("./@href").extract()[0] text = entry.xpath(".//text()").extract()[0] yield Request( url=urlparse.urljoin(response.url, href), meta={"product": response.meta["product"], "build": text}, headers={"Referer": response.url}, callback=self.parse_product, ) else: sdk = None for href in reversed(response.xpath("//div[@id='content_gpl_code']//a/@href").extract()): sdk = href for entry in response.xpath("//div[@id='content_firmware']//table"): href = entry.xpath("./tbody/tr[1]/th[1]//a/@href").extract()[0] text = entry.xpath("./tbody/tr[1]/th[1]//a//text()").extract()[0] date = entry.xpath("./tbody/tr[1]/td[1]//text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", href) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("product", response.meta["product"]) item.add_value("build", response.meta["build"] if "build" in response.meta else None) item.add_value("vendor", self.vendor) item.add_value("sdk", sdk) yield item.load_item()
def parse_download(self, response): for firmware in response.xpath( "//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']" ): product = response.xpath( "//div[@class='prodNavHeaderBody']//text()").extract( )[0].replace(" Support & Drivers", "") date = firmware.xpath( ".//ul[@class='dateVersion']//strong/text()").extract() version = firmware.xpath( ".//ul[@class='dateVersion']//strong/text()").extract() href = firmware.xpath(".//a/@href").extract()[0].replace( "file-download", "file-redirect") text = firmware.xpath(".//a//text()").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y"]) item.add_value("url", href) item.add_value("product", product) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("version", item.find_version_period(version)) item.add_value("vendor", self.name) yield item.load_item()
def parse_kb(self, response): mib = None # need to perform some nasty segmentation because different firmware versions are not clearly separated # reverse order to get MIB before firmware items for entry in reversed( response.xpath( "//div[@id='support-article-downloads']/div/p")): for segment in reversed(entry.extract().split("<br><br>")): resp = HtmlResponse(url=response.url, body=segment, encoding=response.encoding) for href in resp.xpath("//a/@href").extract(): text = resp.xpath("//text()").extract() if "MIBs" in href: mib = href elif "firmware" in href: text = resp.xpath("//text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"]) item.add_value("date", item.find_date(text)) item.add_xpath("url", "//a/@href") item.add_value("mib", mib) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_kb(self, response): # initial html tokenization to find regions segmented by e.g. "======" # or "------" filtered = response.xpath( "//div[@class='sfdc_richtext']").extract()[0].split("=-") for entry in [x and x.strip() for x in filtered]: resp = HtmlResponse(url=response.url, body=entry, encoding=response.encoding) for link in resp.xpath("//a"): href = link.xpath("@href").extract()[0] if "cache-www" in href: text = resp.xpath("//text()").extract() text_next = link.xpath("following::text()").extract() item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"]) version = FirmwareLoader.find_version_period(text_next) if not version: version = FirmwareLoader.find_version_period(text) item.add_value("version", version) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_kb(self, response): mib = None # need to perform some nasty segmentation because different firmware versions are not clearly separated # reverse order to get MIB before firmware items for entry in reversed(response.xpath( "//div[@id='support-article-downloads']/div/p")): for segment in reversed(entry.extract().split("<br><br>")): resp = HtmlResponse( url=response.url, body=segment, encoding=response.encoding) for href in resp.xpath("//a/@href").extract(): text = resp.xpath("//text()").extract() if "MIBs" in href: mib = href elif "firmware" in href: text = resp.xpath("//text()").extract() item = FirmwareLoader( item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"]) item.add_value("date", item.find_date(text)) item.add_xpath("url", "//a/@href") item.add_value("mib", mib) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_product(self, response): # types: firmware = 20, gpl source = 30, bios = 3 for entry in response.xpath( "//div[@id='div_type_20']/div[@id='download-os-answer-table']"): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) version = FirmwareLoader.find_version_period( entry.xpath("./p//text()").extract()) gpl = None # grab first download link (e.g. DLM instead of global or p2p) href = entry.xpath("./table//tr[3]//a/@href").extract()[0] # attempt to find matching source code entry if version: for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"): if version in "".join(source.xpath("./p//text()").extract()): gpl = source.xpath("./table//tr[3]//a/@href").extract()[0] item.add_value("version", version) item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract())) item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract())) item.add_value("url", href) item.add_value("sdk", gpl) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_kb(self, response): # initial html tokenization to find regions segmented by e.g. "======" # or "------" filtered = response.xpath( "//div[@class='sfdc_richtext']").extract()[0].split("=-") for entry in [x and x.strip() for x in filtered]: resp = HtmlResponse(url=response.url, body=entry, encoding=response.encoding) for link in resp.xpath("//a"): href = link.xpath("@href").extract()[0] if "cache-www" in href: text = resp.xpath("//text()").extract() text_next = link.xpath("following::text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"]) version = FirmwareLoader.find_version_period(text_next) if not version: version = FirmwareLoader.find_version_period(text) item.add_value("version", version) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_url(self, response): for link in response.xpath("//a"): text = link.xpath("text()").extract()[0] href = link.xpath("@href").extract()[0] if ".." in href: continue elif href.endswith('/'): if "package/" not in text: product = "%s-%s" % (response.meta["product"], text[0: -1]) if "product" in response.meta else text[0: -1] yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"version": response.meta[ "version"], "product": product}, callback=self.parse_url) elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value("date", item.find_date( link.xpath("following::text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_url(self, response): for link in response.xpath("//a"): text = link.xpath("text()").extract()[0] href = link.xpath("@href").extract()[0] if ".." in href: continue elif href.endswith('/'): if "package/" not in text: product = "%s-%s" % ( response.meta["product"], text[0:-1] ) if "product" in response.meta else text[0:-1] yield Request(url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={ "version": response.meta["version"], "product": product }, callback=self.parse_url) elif any( href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value( "date", item.find_date(link.xpath("following::text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"): desc = entry.xpath(".//text()").extract() for link in entry.xpath(".//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if "_a=download" not in href: yield Request(url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"product": text.strip().split(' ')}, callback=self.parse) elif "firmware" in text.lower() or "f/w" in text.lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y", "%m/%d/%y"]) item.add_value("version", FirmwareLoader.find_version(desc)) item.add_value("date", item.find_date(desc)) item.add_value("description", text) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): text = ( response.xpath("//div[@class='download']/table[1]//tr[1]/td[2]//text()") .extract()[0] .encode("ascii", errors="ignore") ) date = response.xpath("//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract() href = response.xpath("//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract()[0] desc = response.xpath("//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract()[0].encode("utf-8") build = None product = None if "_" in text: build = text.split("_")[1] product = text.split("_")[0] elif " " in text: product = text.split(" ")[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", desc) item.add_value("build", build) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse(self, response): for link in response.xpath("//table//tr"): if not link.xpath("./td[2]/a"): continue text = link.xpath("./td[2]/a/text()").extract()[0] href = link.xpath("./td[2]//@href").extract()[0] if ".." in href: continue elif href.endswith("/"): build = response.meta.get("build", None) product = response.meta.get("product", None) if not product: product = text elif not build: build = text.replace("build", "") yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"build": build, "product": product}, callback=self.parse, ) elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("build", response.meta["build"]) item.add_value("url", href) item.add_value("version", FirmwareLoader.find_version_period(os.path.splitext(text)[0].split("-"))) item.add_value("date", item.find_date(link.xpath("./td[3]/text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): text = response.xpath( "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract( )[0].encode("ascii", errors="ignore") date = response.xpath( "//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract( ) href = response.xpath( "//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract( )[0] desc = response.xpath( "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract( )[0].encode("utf-8") build = None product = None if "_" in text: build = text.split("_")[1] product = text.split("_")[0] elif " " in text: product = text.split(" ")[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", desc) item.add_value("build", build) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): #<a href="#Firmware"><span>Firmware</span></a> if not response.xpath("//a[@href=\"#Firmware\"]").extract(): yield None description = response.xpath( "//div[@class=\"product-name\"]//strong/text()").extract()[0] url = response.xpath( "//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href" ).extract()[0] date = response.xpath( "//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()" ).extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", url) item.add_value("date", item.find_date(date)) item.add_value("description", description) item.add_value("product", response.meta["product"]) item.add_value("version", response.meta["version"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_download(self, response): for entry in response.xpath("//div[@class='downloadtable']"): text = entry.xpath(".//text()").extract() if "firmware" in " ".join(text).lower(): text = entry.xpath( ".//li[@class='maindescription' and position() = 1]//text()" ).extract() date = entry.xpath( ".//li[@class='maindescription' and position() = 2]//text()" ).extract() href = entry.xpath( ".//li[@class='maindescription']//a/@onclick" ).extract()[0].split( '\'')[1] + "&button=Continue+with+Download&Continue=yes" item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y"]) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("date", item.find_date(date)) item.add_value("version", FirmwareLoader.find_version(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"): desc = entry.xpath(".//text()").extract() for link in entry.xpath(".//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if "_a=download" not in href: yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"product": text.strip().split(' ')}, callback=self.parse) elif "firmware" in text.lower() or "f/w" in text.lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y", "%m/%d/%y"]) item.add_value("version", FirmwareLoader.find_version(desc)) item.add_value("date", item.find_date(desc)) item.add_value("description", text) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): if response.xpath("//form[@name='UCagreement']"): for href in response.xpath( "//div[@id='productAndDoc']").extract()[0].split('"'): if "downloads.polycom.com" in href: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", response.meta["date"]) item.add_value("description", response.meta["description"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item() elif response.xpath("//div[@id='ContentChannel']"): for entry in response.xpath("//div[@id='ContentChannel']//li"): if not entry.xpath("./a"): continue text = entry.xpath("./a//text()").extract()[0] href = entry.xpath("./a/@href").extract()[0].strip() date = entry.xpath("./span//text()").extract() path = urlparse.urlparse(href).path if any(x in text.lower() for x in ["end user license agreement", "eula", "release notes", "mac os", "windows", "guide", "(pdf)", "sample"]) or href.endswith(".pdf"): continue elif any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower(): yield Request( url=urlparse.urljoin( response.url, PolycomSpider.fix_url(href)), meta={"product": response.meta["product"] if "product" in response.meta else text, "date": date, "version": FirmwareLoader.find_version_period([text]), "description": text}, headers={"Referer": response.url}, callback=self.parse) elif path: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"]) item.add_value( "version", FirmwareLoader.find_version_period([text])) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", text) # item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".npk") or href.endswith(".lzb"): text = response.xpath("//text()").extract() basename = href.split("/")[-1] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"]) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", basename[0: basename.rfind("-")]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_product(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".npk") or href.endswith(".lzb"): text = response.xpath("//text()").extract() basename = href.split("/")[-1] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"]) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", basename[0:basename.rfind("-")]) item.add_value("vendor", self.name) item.add_value("version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse(self, response): for link in response.xpath("//a"): text = link.xpath(".//text()").extract()[0] href = link.xpath(".//@href").extract()[0] if ".." in href: continue elif href.endswith('/'): yield Request(url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse) elif href.endswith(".gz") and ".iso" not in href: # strip off multiple file extensions basename = os.path.splitext(text)[0] while ".img" in basename or ".iso" in basename: basename = os.path.splitext(basename)[0] basename = basename.split("-") version = FirmwareLoader.find_version_period(basename) # attempt to parse filename and generate product/version # strings remove = [version] if version else [] for i in range(0, len(basename)): if "BETA" in basename[i]: version += "-%s%s" % (basename[i], basename[i + 1]) remove.append(basename[i]) remove.append(basename[i + 1]) elif "RC" in basename[i]: version += "-%s" % (basename[i]) remove.append(basename[i]) elif "RELEASE" in basename[i]: remove.append(basename[i]) basename = [x for x in basename if x not in remove] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", version) item.add_value("url", href) item.add_value( "date", item.find_date(link.xpath("following::text()").extract())) item.add_value("product", "-".join(basename)) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for link in response.xpath("//a"): text = link.xpath(".//text()").extract()[0] href = link.xpath(".//@href").extract()[0] if ".." in href: continue elif href.endswith('/'): yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse) elif href.endswith(".gz") and ".iso" not in href: # strip off multiple file extensions basename = os.path.splitext(text)[0] while ".img" in basename or ".iso" in basename: basename = os.path.splitext(basename)[0] basename = basename.split("-") version = FirmwareLoader.find_version_period(basename) # attempt to parse filename and generate product/version # strings remove = [version] if version else [] for i in range(0, len(basename)): if "BETA" in basename[i]: version += "-%s%s" % (basename[i], basename[i + 1]) remove.append(basename[i]) remove.append(basename[i + 1]) elif "RC" in basename[i]: version += "-%s" % (basename[i]) remove.append(basename[i]) elif "RELEASE" in basename[i]: remove.append(basename[i]) basename = [x for x in basename if x not in remove] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", version) item.add_value("url", href) item.add_value("date", item.find_date( link.xpath("following::text()").extract())) item.add_value("product", "-".join(basename)) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for link in response.xpath("//table//tr"): if not link.xpath("./td[2]/a"): continue text = link.xpath("./td[2]/a/text()").extract()[0] href = link.xpath("./td[2]//@href").extract()[0] if ".." in href: continue elif href.endswith('/'): build = response.meta.get("build", None) product = response.meta.get("product", None) if not product: product = text elif not build: build = text.replace("build", "") yield Request(url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={ "build": build, "product": product }, callback=self.parse) elif any( href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("build", response.meta["build"]) item.add_value("url", href) item.add_value( "version", FirmwareLoader.find_version_period( os.path.splitext(text)[0].split("-"))) item.add_value( "date", item.find_date(link.xpath("./td[3]/text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for i in range(0, len(response.xpath("//ul[@id='normaltab2']//a"))): if "firmware" in "".join(response.xpath( "//ul[@id='normaltab2']/li[%d]/a//text()" % (i + 1)).extract()).lower(): for entry in response.xpath( "//div[@id='normalcon2']/div[%d]//table/tr[1]" % (i + 1)): version = entry.xpath("./td[2]//text()").extract() date = entry.xpath("./td[4]//text()").extract() href = entry.xpath("./td[5]//a/@href").extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value( "version", FirmwareLoader.find_version_period(version)) item.add_value("url", href) item.add_value("date", item.find_date(date)) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): if response.xpath( "//dl[@id='dlDropDownBox']") and "build" not in response.meta: for entry in response.xpath("//dl[@id='dlDropDownBox']//li/a"): href = entry.xpath("./@href").extract()[0] text = entry.xpath(".//text()").extract()[0] yield Request(url=urlparse.urljoin(response.url, href), meta={ "product": response.meta["product"], "build": text }, headers={"Referer": response.url}, callback=self.parse_product) else: sdk = None for href in reversed( response.xpath( "//div[@id='content_gpl_code']//a/@href").extract()): sdk = href for entry in response.xpath( "//div[@id='content_firmware']//table"): href = entry.xpath("./tbody/tr[1]/th[1]//a/@href").extract()[0] text = entry.xpath( "./tbody/tr[1]/th[1]//a//text()").extract()[0] date = entry.xpath("./tbody/tr[1]/td[1]//text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", href) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("product", response.meta["product"]) item.add_value( "build", response.meta["build"] if "build" in response.meta else None) item.add_value("vendor", self.vendor) item.add_value("sdk", sdk) yield item.load_item()
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".npk") or href.endswith(".lzb"): if href.startswith("//"): href = "http:" + href text = response.xpath("//text()").extract() items = href.split('/') version = items[-2] basename = items[-1] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"]) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", basename[0: basename.rfind("-")]) item.add_value("vendor", self.name) item.add_value( "version", version) yield item.load_item()
def parse_product(self, response): #<a href="#Firmware"><span>Firmware</span></a> if not response.xpath("//a[@href=\"#Firmware\"]").extract(): yield None description = response.xpath("//div[@class=\"product-name\"]//strong/text()").extract()[0] url = response.xpath("//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href").extract()[0] date = response.xpath("//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()").extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", url) item.add_value("date", item.find_date(date)) item.add_value("description", description) item.add_value("product", response.meta["product"]) item.add_value("version", response.meta["version"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): for row in response.xpath( "//div[@class='main_data_block']//table/tr[position() > 1]"): text = row.xpath("./td[1]//text()").extract() edition = row.xpath("./td[2]//text()").extract() date = row.xpath("./td[4]//text()").extract() hrefs = row.xpath("./td[5]//a/@href").extract() if hrefs: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) item.add_value( "version", FirmwareLoader.find_version_period(edition)) item.add_value("build", FirmwareLoader.find_build(edition)) item.add_value("url", hrefs[0]) item.add_value("date", item.find_date(date)) item.add_value("description", text[2].strip()) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for entry in response.xpath("//div[@class='downloadtable']"): text = entry.xpath(".//text()").extract() if "firmware" in " ".join(text).lower(): text = entry.xpath( ".//li[@class='maindescription' and position() = 1]//text()").extract() date = entry.xpath( ".//li[@class='maindescription' and position() = 2]//text()").extract() href = entry.xpath(".//li[@class='maindescription']//a/@onclick").extract()[ 0].split('\'')[1] + "&button=Continue+with+Download&Continue=yes" item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y"]) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("date", item.find_date(date)) item.add_value("version", FirmwareLoader.find_version(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for firmware in response.xpath("//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']"): product = ( response.xpath("//div[@class='prodNavHeaderBody']//text()") .extract()[0] .replace(" Support & Drivers", "") ) date = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract() version = firmware.xpath(".//ul[@class='dateVersion']//strong/text()").extract() href = firmware.xpath(".//a/@href").extract()[0].replace("file-download", "file-redirect") text = firmware.xpath(".//a//text()").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y"]) item.add_value("url", href) item.add_value("product", product) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("version", item.find_version_period(version)) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for row in response.xpath( "//div[@class='main_data_block']//table/tr[position() > 1]"): text = row.xpath("./td[1]//text()").extract() edition = row.xpath("./td[2]//text()").extract() date = row.xpath("./td[4]//text()").extract() hrefs = row.xpath("./td[5]//a/@href").extract() if hrefs: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) item.add_value("version", FirmwareLoader.find_version_period(edition)) item.add_value("build", FirmwareLoader.find_build(edition)) item.add_value("url", hrefs[0]) item.add_value("date", item.find_date(date)) item.add_value("description", text[2].strip()) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): if response.xpath("//form[@name='UCagreement']"): for href in response.xpath( "//div[@id='productAndDoc']").extract()[0].split('"'): if "downloads.polycom.com" in href: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", response.meta["date"]) item.add_value("description", response.meta["description"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item() elif response.xpath("//div[@id='ContentChannel']"): for entry in response.xpath("//div[@id='ContentChannel']//li"): if not entry.xpath("./a"): continue text = entry.xpath("./a//text()").extract()[0] href = entry.xpath("./a/@href").extract()[0].strip() date = entry.xpath("./span//text()").extract() path = urlparse(href).path if any(x in text.lower() for x in [ "end user license agreement", "eula", "release notes", "mac os", "windows", "guide", "(pdf)", "sample" ]) or href.endswith(".pdf"): continue elif any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower(): yield Request(url=urljoin(response.url, PolycomSpider.fix_url(href)), meta={ "product": response.meta["product"] if "product" in response.meta else text, "date": date, "version": FirmwareLoader.find_version_period( [text]), "description": text }, headers={"Referer": response.url}, callback=self.parse) elif path: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"]) item.add_value("version", FirmwareLoader.find_version_period([text])) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", text) # item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()