def parse(self, response): if response.xpath( "//table[@id='ctl00_ctl00_ContentPlaceHolderMain_ContentPlaceHolderSupportMiddle_Table_REC']"): for row in response.xpath( "//table[@id='ctl00_ctl00_ContentPlaceHolderMain_ContentPlaceHolderSupportMiddle_Table_REC']/tr[position() > 1]"): product = row.xpath(".//td[1]//text()").extract()[0] rev = row.xpath(".//td[3]//text()").extract()[0] href = row.xpath(".//td[4]//a/@href").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", rev) item.add_value("url", SupermicroSpider.fix_url(href)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item() else: for row in response.xpath( "//table//table//table//table//table//tr[position() > 1]"): product = row.xpath(".//td[1]//text()").extract()[0] href = row.xpath(".//td[2]//a/@href").extract()[0] rev = row.xpath(".//td[4]//text()").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", rev) item.add_value("url", SupermicroSpider.fix_url(href)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): mib = None if not response.body: return for entry in reversed(response.xpath("//table/tbody/tr")): if entry.xpath("./td[contains(@class, 'versionTd')]/select"): for i in range( 0, len(entry.xpath("./td[contains(@class, 'versionTd')]/select/option"))): desc = entry.xpath( "./td[contains(@class, 'typeTd')]/span/text()").extract()[i].lower() if "firmware" in desc: date = entry.xpath( "./td[contains(@class, 'dateTd')]/span/text()").extract()[i] ver = entry.xpath( "./td[contains(@class, 'versionTd')]/select/option/text()").extract()[i] href = entry.xpath( "./td[contains(@class, 'downloadTd')]/div/a[1]/@data-filelink").extract()[i] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"]) item.add_value("version", ver) item.add_value("date", date) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("mib", mib) item.add_value("vendor", self.name) yield item.load_item() else: desc = entry.xpath( "./td[contains(@class, 'typeTd')]//text()").extract()[1].lower() if "firmware" in desc: date = entry.xpath( "./td[contains(@class, 'dateTd')]//text()").extract() ver = entry.xpath( "./td[contains(@class, 'versionTd')]//text()").extract() href = entry.xpath( "./td[contains(@class, 'downloadTd')]//a/@data-filelink").extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"]) item.add_value("version", ver) item.add_value("date", date) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("mib", mib) item.add_value("vendor", self.name) yield item.load_item() elif "mib" in desc: mib = entry.xpath( "./td[contains(@class, 'downloadTd')]//a/@href").extract()[0]
def parse_product(self, response): mib = None if not response.body: return for entry in reversed(response.xpath("//table/tbody/tr")): if entry.xpath("./td[contains(@class, 'versionTd')]/select"): for i in range( 0, len(entry.xpath("./td[contains(@class, 'versionTd')]/select/option"))): desc = entry.xpath( "./td[contains(@class, 'typeTd')]/span/text()").extract()[i].lower() if "firmware" in desc: date = entry.xpath( "./td[contains(@class, 'dateTd')]/span/text()").extract()[i] ver = entry.xpath( "./td[contains(@class, 'versionTd')]/select/option/text()").extract()[i] href = entry.xpath( "./td[contains(@class, 'downloadTd')]/div/a[1]/@data-filelink").extract()[i] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"]) item.add_value("version", ver) item.add_value("date", date) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("mib", mib) item.add_value("vendor", self.name) yield item.load_item() else: desc = entry.xpath( "./td[contains(@class, 'typeTd')]//text()").extract()[1].lower() if "firmware" in desc: date = entry.xpath( "./td[contains(@class, 'dateTd')]//text()").extract() ver = entry.xpath( "./td[contains(@class, 'versionTd')]//text()").extract() href = entry.xpath( "./td[contains(@class, 'downloadTd')]//a/@data-filelink").extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m-%d-%Y"]) item.add_value("version", ver) item.add_value("date", date) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("mib", mib) item.add_value("vendor", self.name) yield item.load_item() elif "mib" in desc: mib = entry.xpath( "./td[contains(@class, 'downloadTd')]//a/@href").extract()[0]
def parse_json(self, response): json_response = json.loads(response.body_as_unicode()) if "products" in json_response: for product in json_response["products"]: yield Request( url=urlparse.urljoin(response.url, "?product=%s" % (product["slug"])), headers={"Referer": response.url, "X-Requested-With": "XMLHttpRequest"}, meta={"product": product["slug"]}, callback=self.parse_json, ) if "url" in response.meta: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("url", response.meta["url"]) item.add_value("product", response.meta["product"]) item.add_value("date", response.meta["date"]) item.add_value("description", response.meta["description"]) item.add_value("build", response.meta["build"]) item.add_value("version", response.meta["version"]) item.add_value("sdk", json_response["download_url"]) item.add_value("vendor", self.name) yield item.load_item() elif "product" in response.meta: for entry in json_response["downloads"]: if entry["category__slug"] == "firmware": if entry["sdk__id"]: yield Request( url=urlparse.urljoin(response.url, "?gpl=%s&eula=True" % (entry["sdk__id"])), headers={"Referer": response.url, "X-Requested-With": "XMLHttpRequest"}, meta={ "product": response.meta["product"], "date": entry["date_published"], "build": entry["build"], "url": entry["file_path"], "version": entry["version"], "description": entry["name"], }, callback=self.parse_json, ) else: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("url", entry["file_path"]) item.add_value("product", response.meta["product"]) item.add_value("date", entry["date_published"]) item.add_value("description", entry["name"]) item.add_value("build", entry["build"]) item.add_value("version", entry["version"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): self.logger.debug("Parsing %s..." % response.url) tmp = response.url.split('/')[-2] version = "" if tmp[0] != 'v': links = response.css( "div.hardware-version dl.select-version li a::attr(href)" ).extract() if len(links): version = links[0].split('/')[-2] del links[0] for link in links: yield response.follow(link, meta=response.meta, callback=self.parse_product) firmwares = response.css("#content_Firmware > table") self.logger.debug("%s %s: %d binary firmware found." % (response.meta["product"], version, len(firmwares))) for firmware in firmwares: spans = firmware.css('tr.detail-info span') item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%y-%m-%d"]) item.add_value("vendor", self.vendor) item.add_value("url", firmware.css("a::attr(href)").get()) item.add_value("date", spans[1].css("::text").get().strip()) item.add_value("language", spans[3].css("::text").get().strip()) item.add_value("size", spans[5].css("::text").get().strip()) item.add_value("description", "\n".join(firmware.css('td.more p').getall())) item.add_value("product", response.meta["product"]) item.add_value("category", response.meta["category"]) item.add_value("version", version) yield item.load_item() gpl_source_codes = response.css("#content_GPL-Code a") self.logger.debug( "%s %s: %d gpl source code found." % (response.meta["product"], version, len(gpl_source_codes))) for gpl in gpl_source_codes: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("vendor", self.vendor) item.add_value("url", gpl.css("a::attr(href)").get()) item.add_value("product", response.meta["product"]) item.add_value("category", response.meta["category"]) item.add_value("version", version) yield item.load_item()
def parse(self, response): if response.xpath("//form[@name='UCagreement']"): for href in response.xpath( "//div[@id='productAndDoc']").extract()[0].split('"'): if "downloads.polycom.com" in href: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", response.meta["date"]) item.add_value("description", response.meta["description"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item() elif response.xpath("//div[@id='ContentChannel']"): for entry in response.xpath("//div[@id='ContentChannel']//li"): if not entry.xpath("./a"): continue text = entry.xpath("./a//text()").extract()[0] href = entry.xpath("./a/@href").extract()[0].strip() date = entry.xpath("./span//text()").extract() path = urlparse.urlparse(href).path if any(x in text.lower() for x in ["end user license agreement", "eula", "release notes", "mac os", "windows", "guide", "(pdf)", "sample"]) or href.endswith(".pdf"): continue elif any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower(): yield Request( url=urlparse.urljoin( response.url, PolycomSpider.fix_url(href)), meta={"product": response.meta["product"] if "product" in response.meta else text, "date": date, "version": FirmwareLoader.find_version_period([text]), "description": text}, headers={"Referer": response.url}, callback=self.parse) elif path: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%B %d, %Y"]) item.add_value( "version", FirmwareLoader.find_version_period([text])) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", text) # item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_json(self, response): json_response = json.loads(response.body_as_unicode()) if "products" in json_response: for product in json_response["products"]: yield Request( url=urlparse.urljoin( response.url, "?product=%s" % (product["slug"])), headers={"Referer": response.url, "X-Requested-With": "XMLHttpRequest"}, meta={"product": product["slug"]}, callback=self.parse_json) if "url" in response.meta: item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("url", response.meta["url"]) item.add_value("product", response.meta["product"]) item.add_value("date", response.meta["date"]) item.add_value("description", response.meta["description"]) item.add_value("build", response.meta["build"]) item.add_value("version", response.meta["version"]) item.add_value("sdk", json_response["download_url"]) item.add_value("vendor", self.name) yield item.load_item() elif "product" in response.meta: for entry in json_response["downloads"]: if entry["category__slug"] == "firmware": if entry["sdk__id"]: yield Request( url=urlparse.urljoin( response.url, "?gpl=%s&eula=True" % (entry["sdk__id"])), headers={"Referer": response.url, "X-Requested-With": "XMLHttpRequest"}, meta={"product": response.meta["product"], "date": entry["date_published"], "build": entry[ "build"], "url": entry["file_path"], "version": entry["version"], "description": entry["name"]}, callback=self.parse_json) else: item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("url", entry["file_path"]) item.add_value("product", response.meta["product"]) item.add_value("date", entry["date_published"]) item.add_value("description", entry["name"]) item.add_value("build", entry["build"]) item.add_value("version", entry["version"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_url(self, response): for link in response.xpath("//a"): text = link.xpath("text()").extract()[0] href = link.xpath("@href").extract()[0] if ".." in href: continue elif href.endswith('/'): if "package/" not in text: product = "%s-%s" % (response.meta["product"], text[0: -1]) if "product" in response.meta else text[0: -1] yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"version": response.meta[ "version"], "product": product}, callback=self.parse_url) elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value("date", item.find_date( link.xpath("following::text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for firmware in response.xpath( "//li[@class='categoryBucket categoryBucketId-7']//li[@class='record ']" ): product = response.xpath( "//div[@class='prodNavHeaderBody']//text()").extract( )[0].replace(" Support & Drivers", "") date = firmware.xpath( ".//ul[@class='dateVersion']//strong/text()").extract() version = firmware.xpath( ".//ul[@class='dateVersion']//strong/text()").extract() href = firmware.xpath(".//a/@href").extract()[0].replace( "file-download", "file-redirect") text = firmware.xpath(".//a//text()").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y"]) item.add_value("url", href) item.add_value("product", product) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("version", item.find_version_period(version)) item.add_value("vendor", self.name) yield item.load_item()
def parse_kb(self, response): mib = None # need to perform some nasty segmentation because different firmware versions are not clearly separated # reverse order to get MIB before firmware items for entry in reversed( response.xpath( "//div[@id='support-article-downloads']/div/p")): for segment in reversed(entry.extract().split("<br><br>")): resp = HtmlResponse(url=response.url, body=segment, encoding=response.encoding) for href in resp.xpath("//a/@href").extract(): text = resp.xpath("//text()").extract() if "MIBs" in href: mib = href elif "firmware" in href: text = resp.xpath("//text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"]) item.add_value("date", item.find_date(text)) item.add_xpath("url", "//a/@href") item.add_value("mib", mib) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_url(self, response): for link in response.xpath("//a"): text = link.xpath("text()").extract()[0] href = link.xpath("@href").extract()[0] if ".." in href: continue elif href.endswith('/'): if "package/" not in text: product = "%s-%s" % ( response.meta["product"], text[0:-1] ) if "product" in response.meta else text[0:-1] yield Request(url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={ "version": response.meta["version"], "product": product }, callback=self.parse_url) elif any( href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value( "date", item.find_date(link.xpath("following::text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): # types: firmware = 20, gpl source = 30, bios = 3 for entry in response.xpath( "//div[@id='div_type_20']/div[@id='download-os-answer-table']"): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) version = FirmwareLoader.find_version_period( entry.xpath("./p//text()").extract()) gpl = None # grab first download link (e.g. DLM instead of global or p2p) href = entry.xpath("./table//tr[3]//a/@href").extract()[0] # attempt to find matching source code entry if version: for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"): if version in "".join(source.xpath("./p//text()").extract()): gpl = source.xpath("./table//tr[3]//a/@href").extract()[0] item.add_value("version", version) item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract())) item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract())) item.add_value("url", href) item.add_value("sdk", gpl) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_model_files(self, response): meta = response.meta # Due to Python2 and unicode objects, we're using response body here. Issues are from the 'remarks' fields. try: model_files = json.loads(response.body)['downloads']['firmware'] except KeyError: logging.info("No downloadable firmware for %s", meta) return for _, fw_info in model_files.iteritems(): href = fw_info['links'][ 'global'] # options: {'global', 'europe', 'usa'} if not href.startswith(u"https://") and not href.startswith( u"http://"): href = urlparse.urljoin(u"https://", href) item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt="%Y-%m-%d") item.add_value('product', meta['name']) item.add_value('vendor', self.name) item.add_value('description', fw_info['releasenote']) item.add_value('date', fw_info['published_at']) item.add_value('version', fw_info['version']) item.add_value('url', href) yield item.load_item()
def parse_kb(self, response): mib = None # need to perform some nasty segmentation because different firmware versions are not clearly separated # reverse order to get MIB before firmware items for entry in reversed(response.xpath( "//div[@id='support-article-downloads']/div/p")): for segment in reversed(entry.extract().split("<br><br>")): resp = HtmlResponse( url=response.url, body=segment, encoding=response.encoding) for href in resp.xpath("//a/@href").extract(): text = resp.xpath("//text()").extract() if "MIBs" in href: mib = href elif "firmware" in href: text = resp.xpath("//text()").extract() item = FirmwareLoader( item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"]) item.add_value("date", item.find_date(text)) item.add_xpath("url", "//a/@href") item.add_value("mib", mib) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_kb(self, response): # initial html tokenization to find regions segmented by e.g. "======" # or "------" filtered = response.xpath( "//div[@class='sfdc_richtext']").extract()[0].split("=-") for entry in [x and x.strip() for x in filtered]: resp = HtmlResponse(url=response.url, body=entry, encoding=response.encoding) for link in resp.xpath("//a"): href = link.xpath("@href").extract()[0] if "cache-www" in href: text = resp.xpath("//text()").extract() text_next = link.xpath("following::text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"]) version = FirmwareLoader.find_version_period(text_next) if not version: version = FirmwareLoader.find_version_period(text) item.add_value("version", version) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): # types: firmware = 20, gpl source = 30, bios = 3 for entry in response.xpath( "//div[@id='div_type_20']/div[@id='download-os-answer-table']"): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) version = FirmwareLoader.find_version_period( entry.xpath("./p//text()").extract()) gpl = None # grab first download link (e.g. DLM instead of global or p2p) href = entry.xpath("./table//tr[3]//a/@href").extract()[0] # attempt to find matching source code entry if version: for source in response.xpath("//div[@id='div_type_30']/div[@id='download-os-answer-table']"): if version in "".join(source.xpath("./p//text()").extract()): gpl = source.xpath("./table//tr[3]//a/@href").extract()[0] item.add_value("version", version) item.add_value("date", item.find_date(entry.xpath("./table//tr[2]/td[1]//text()").extract())) item.add_value("description", " ".join(entry.xpath("./table//tr[1]//td[1]//text()").extract())) item.add_value("url", href) item.add_value("sdk", gpl) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for link in response.xpath("//table//tr"): if not link.xpath("./td[2]/a"): continue text = link.xpath("./td[2]/a/text()").extract()[0] href = link.xpath("./td[2]//@href").extract()[0] if ".." in href: continue elif href.endswith("/"): build = response.meta.get("build", None) product = response.meta.get("product", None) if not product: product = text elif not build: build = text.replace("build", "") yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"build": build, "product": product}, callback=self.parse, ) elif any(href.endswith(x) for x in [".bin", ".elf", ".fdt", ".imx", ".chk", ".trx"]): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("build", response.meta["build"]) item.add_value("url", href) item.add_value("version", FirmwareLoader.find_version_period(os.path.splitext(text)[0].split("-"))) item.add_value("date", item.find_date(link.xpath("./td[3]/text()").extract())) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_model_page(self, response): for entry in response.xpath( "//section[@id='topicsdownload']//div[@class='col topic']/" "section[@class='box articles']//div[@class='accordion-item']" ): name = entry.xpath( "./a[@class='accordion-title']/h1/text()").extract_first() url = entry.xpath( "./div[@class='accordion-content']//a/@href").extract_first() if '#confirm-download-' in url or 'http://kb.netgear.com/' in url: continue if 'Firmware' in name and not 'Upgrade' in name: name_split = name.split(" ") index = name_split.index('Version') # only continue if there is a version number if index: version = name_split[index + 1] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", version) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_kb(self, response): # initial html tokenization to find regions segmented by e.g. "======" # or "------" filtered = response.xpath( "//div[@class='sfdc_richtext']").extract()[0].split("=-") for entry in [x and x.strip() for x in filtered]: resp = HtmlResponse(url=response.url, body=entry, encoding=response.encoding) for link in resp.xpath("//a"): href = link.xpath("@href").extract()[0] if "cache-www" in href: text = resp.xpath("//text()").extract() text_next = link.xpath("following::text()").extract() item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%b %d, %Y", "%B %d, %Y", "%m/%d/%Y"]) version = FirmwareLoader.find_version_period(text_next) if not version: version = FirmwareLoader.find_version_period(text) item.add_value("version", version) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): js = response.text if js.startswith("var commonInfo"): print response.url print js p_product = u"id:\"(?P<product>.*?)\"" p_description = u"title:\"(?P<description>.*?)\"" p_version = u"romVersions:\"(?P<version>.*?)\"" p_url = u"romUrl:\"(?P<url>.*?)\"" p_date = u"updateDate:\"(?P<date>.*?)\"" import re products = re.findall(p_version, js) descriptions = re.findall(p_description, js) versions = re.findall(p_version, js) urls = re.findall(p_url, js) dates = re.findall(p_date, js) for i in xrange(len(products)): product = products[i] url = urls[i] version = versions[i] description = descriptions[i] date = dates[i] item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", url) item.add_value("product", product) item.add_value("description", description) item.add_value("date", date) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): if response.xpath("//dl[@id='dlDropDownBox']") and "build" not in response.meta: for entry in response.xpath("//dl[@id='dlDropDownBox']//li/a"): href = entry.xpath("./@href").extract()[0] text = entry.xpath(".//text()").extract()[0] yield Request( url=urlparse.urljoin(response.url, href), meta={"product": response.meta["product"], "build": text}, headers={"Referer": response.url}, callback=self.parse_product, ) else: sdk = None for href in reversed(response.xpath("//div[@id='content_gpl_code']//a/@href").extract()): sdk = href for entry in response.xpath("//div[@id='content_firmware']//table"): href = entry.xpath("./tbody/tr[1]/th[1]//a/@href").extract()[0] text = entry.xpath("./tbody/tr[1]/th[1]//a//text()").extract()[0] date = entry.xpath("./tbody/tr[1]/td[1]//text()").extract() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", href) item.add_value("date", item.find_date(date)) item.add_value("description", text) item.add_value("product", response.meta["product"]) item.add_value("build", response.meta["build"] if "build" in response.meta else None) item.add_value("vendor", self.vendor) item.add_value("sdk", sdk) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"): desc = entry.xpath(".//text()").extract() for link in entry.xpath(".//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if "_a=download" not in href: yield Request(url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"product": text.strip().split(' ')}, callback=self.parse) elif "firmware" in text.lower() or "f/w" in text.lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y", "%m/%d/%y"]) item.add_value("version", FirmwareLoader.find_version(desc)) item.add_value("date", item.find_date(desc)) item.add_value("description", text) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product_firmware(self, response): # Get product name product = response.meta["product"] # Get the product last updated date create_date = '' for li_elem in response.xpath('//li'): if li_elem.xpath('@class').re(r'(\[hide_empty:create_date\])'): create_date = li_elem.xpath( './/span[@class="badge"]/text()').extract_first() elif li_elem.xpath('@class').re(r'(\[hide_empty:update_date\])'): update_date = li_elem.xpath( './/span[@class="badge"]/text()').extract_first() break else: update_date = create_date # File list table of downloads file_table = response.xpath( '//table[@class="wpdm-filelist table table-hover"]') for dl_button in file_table.xpath( './/a[@class="inddl btn btn-primary btn-sm"]'): href = dl_button.xpath("@href") item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt="%B %d, %Y") item.add_value("product", product) item.add_value("vendor", self.name) item.add_value("date", update_date) item.add_value("url", href.extract_first()) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='menu2']//table//table//table[2]//td[1]//td[2]"): desc = entry.xpath(".//text()").extract() for link in entry.xpath(".//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if "_a=download" not in href: yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"product": text.strip().split(' ')}, callback=self.parse) elif "firmware" in text.lower() or "f/w" in text.lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y", "%m/%d/%y"]) item.add_value("version", FirmwareLoader.find_version(desc)) item.add_value("date", item.find_date(desc)) item.add_value("description", text) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): # bit ugly but it works :-) if "pid" not in response.meta: for pid in range(0, 1000): yield Request(url=urllib.parse.urljoin( response.url, "firmware_details.html?id=%s" % pid), meta={"pid": pid}, headers={ "Referer": response.url, "X-Requested-With": "XMLHttpRequest" }, callback=self.parse) else: for product in response.xpath( "//div[@class='download_list_icon']/span/text()").extract( ): prods = response.xpath("//table[@class='down_table']//tr") # print(prods) # skip the table header for p in [x for x in prods[1:]]: version = p.xpath('td[1]//text()').extract_first() # skip partial versions if '_p' in version: continue item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", version) item.add_value( "url", 'https://www.foscam.com' + p.xpath('td[6]//a/@href').extract_first()) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for entry in response.xpath("//table/tr[position() > 3]"): if not entry.xpath("./td[2]/a"): continue text = entry.xpath("./td[2]/a//text()").extract()[0] href = entry.xpath("./td[2]/a/@href").extract()[0] date = entry.xpath("./td[3]//text()").extract()[0] # if "DSM" in response.url: if 'DSMUC' in response.url: software = 'DSMUC' elif 'DSM' in response.url: software = "DSM" elif 'VSM' in response.url: software = "VSM" elif "VSF" in response.url: software = "VSF" elif "SRM" in response.url: software = "SRM" else: continue # should not happen :-) if href.endswith('/'): build = None version = response.meta.get( "version", FirmwareLoader.find_version_period([text])) if not FirmwareLoader.find_version_period([text]): build = text[0: -1] yield Request( url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, meta={"build": build, "version": version}, callback=self.parse) elif all(not href.lower().endswith(x) for x in [".txt", ".md5", ".torrent"]): product = None basename = os.path.splitext(text)[0].split("_") if software in basename: if response.meta["build"] in basename: basename.remove(response.meta["build"]) basename.remove(software) product = " ".join(basename) else: # usually "synology_x86_ds13_1504 product = basename[-2] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("build", response.meta["build"]) item.add_value("version", response.meta["version"]) if software == "DSM": item.add_value("mib", "https://global.download.synology.com/download/Document/Software/" "DeveloperGuide/Firmware/DSM/All/enu/Synology_MIB_File.zip") item.add_value("url", href) item.add_value("date", date) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for entry in response.xpath("//div[@class='downloadtable']"): text = entry.xpath(".//text()").extract() if "firmware" in " ".join(text).lower(): text = entry.xpath( ".//li[@class='maindescription' and position() = 1]//text()" ).extract() date = entry.xpath( ".//li[@class='maindescription' and position() = 2]//text()" ).extract() href = entry.xpath( ".//li[@class='maindescription']//a/@onclick" ).extract()[0].split( '\'')[1] + "&button=Continue+with+Download&Continue=yes" item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%m/%d/%Y"]) item.add_value("url", href) item.add_value("product", response.meta["product"]) item.add_value("date", item.find_date(date)) item.add_value("version", FirmwareLoader.find_version(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for url in self.firmware: item = FirmwareLoader(item=FirmwareImage()) item.add_value("url", url) item.add_value("product", url.split("/")[-1].split("_")[0]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): #<a href="#Firmware"><span>Firmware</span></a> if not response.xpath("//a[@href=\"#Firmware\"]").extract(): yield None description = response.xpath( "//div[@class=\"product-name\"]//strong/text()").extract()[0] url = response.xpath( "//*[@id=\"content_Firmware\"]/table/tbody/tr[1]/th/a/@href" ).extract()[0] date = response.xpath( "//*[@id=\"content_Firmware\"]/table/tbody/tr[2]/td[1]/span[2]/text()" ).extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("url", url) item.add_value("date", item.find_date(date)) item.add_value("description", description) item.add_value("product", response.meta["product"]) item.add_value("version", response.meta["version"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): text = response.xpath( "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract( )[0].encode("ascii", errors="ignore") date = response.xpath( "//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract( ) href = response.xpath( "//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract( )[0] desc = response.xpath( "//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract( )[0].encode("utf-8") build = None product = None if "_" in text: build = text.split("_")[1] product = text.split("_")[0] elif " " in text: product = text.split(" ")[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", desc) item.add_value("build", build) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): for a in response.xpath("//div[@id='mainbox']//dd/a"): url = a.xpath("./@href").extract()[0] title = a.xpath("./text()").extract()[0] description = title items = title.split(' ') product = items[0] version = items[-1] #FH456V1.0 Firmware V10.1.1.1_EN #E101(V2.0) Firmware V1.10.0.1_EN #G3(V2.0) Firmware V2.0.0.1_EN #O3 Firmware V1.0.0.3_EN #i6 Firmware V1.0.0.9(3857)_EN import re p = r'^(?P<product>([a-uw-zA-UW-Z0-9])+)[\(\uff08]?(V\d\.0)?' try: ret = re.search(p, items[0].decode('utf-8')) if ret: product = ret.group('product') except: product = item[0] item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value( "version", version) item.add_value("url", url) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): for a in response.xpath("//div[@id='mainbox']//dd/a"): url = a.xpath("./@href").extract()[0] title = a.xpath("./text()").extract()[0] description = title items = title.split(' ') product = items[0] version = items[-1] #FH456V1.0 Firmware V10.1.1.1_EN #E101(V2.0) Firmware V1.10.0.1_EN #G3(V2.0) Firmware V2.0.0.1_EN #O3 Firmware V1.0.0.3_EN #i6 Firmware V1.0.0.9(3857)_EN import re p = ur'^(?P<product>([a-uw-zA-UW-Z0-9])+)[\(\uff08]?(V\d\.0)?' try: ret = re.search(p, items[0].decode('utf-8')) if ret: product = ret.group('product') except: product = item[0] item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value( "version", version) item.add_value("url", url) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): text = ( response.xpath("//div[@class='download']/table[1]//tr[1]/td[2]//text()") .extract()[0] .encode("ascii", errors="ignore") ) date = response.xpath("//div[@class='download']/table[1]//tr[4]/td[2]//text()").extract() href = response.xpath("//div[@class='download']/table[1]//tr[5]/td[2]/a/@href").extract()[0] desc = response.xpath("//div[@class='download']/table[1]//tr[1]/td[2]//text()").extract()[0].encode("utf-8") build = None product = None if "_" in text: build = text.split("_")[1] product = text.split("_")[0] elif " " in text: product = text.split(" ")[0] item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y/%m/%d"]) item.add_value("url", href.encode("utf-8")) item.add_value("date", item.find_date(date)) item.add_value("description", desc) item.add_value("build", build) item.add_value("product", product) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): js = response.text if js.startswith("var commonInfo"): print response.url print js p_product = u"id:\"(?P<product>.*?)\"" p_description = u"title:\"(?P<description>.*?)\"" p_version = u"romVersions:\"(?P<version>.*?)\"" p_url = u"romUrl:\"(?P<url>.*?)\"" p_date = u"updateDate:\"(?P<date>.*?)\"" import re products = re.findall(p_version, js) descriptions = re.findall(p_description, js) versions = re.findall(p_version, js) urls = re.findall(p_url, js) dates = re.findall(p_date, js) for i in xrange(len(products)): product = products[i] url = urls[i] version = versions[i] description = descriptions[i] date = dates[i] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", url) item.add_value("product", product) item.add_value("description", description) item.add_value("date", date) item.add_value("vendor", self.name) yield item.load_item()
def parse_product_sw_fw(self, response): product = response.meta['product'] fw_sect = None #inspect_response(response, self) col_selector_map = {} # Find the "Firmware" section. NOTE: whitespace in the class is intentional for section in response.css('div.docs-table__section '): for col in section.css('div.docs-table__column-name'): col_text = col.xpath('.//text()').extract_first().strip() if len(col_text) > 1: col_selector_map[col_text] = section try: fw_sect = col_selector_map[u'Firmware'] except KeyError: logging.debug("Did not find a 'Firmware' section in the downloads for %s", product) return # Iterate Firmware rows for fw_row in fw_sect.css('div.docs-table__row'): fw_version, fw_href, fw_date, fw_desc = self.extract_fw_info(fw_row, response) if fw_href is None: continue item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%m/%d/%y"]) item.add_value('product', product) item.add_value('vendor', self.name) item.add_value('url', fw_href) item.add_value('description', fw_desc) item.add_value('date', fw_date) yield item.load_item()
def parse(self, response): for product_group in response.css("div.item"): try: category = product_group.css("h2 span::text").get().strip() except: category = product_group.css("h2::text").get().strip() for product in product_group.css("a"): model = product.css("::text").get().strip() link = product.css("::attr(href)").get() if link[-1] == "/": yield response.follow(link, meta={ "category": category, "product": model }, callback=self.parse_product) else: item = FirmwareLoader(item=FirmwareImage(), date_fmt=["%y-%m-%d"]) item.add_value("vendor", self.vendor) item.add_value("url", link) item.add_value("product", model) item.add_value("category", category) yield item.load_item()
def parse_json(self, response): resp = json.loads(response.text) self.logger.debug(resp) for product in resp: name = product['showName'].strip() item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y%m%d"]) # Model, Version, Date, Build self.logger.debug("Parsing '%s'" % name) match = re.search(r'^(.+) (V[\d\.]+)([^\d]+)(\d+)_([\d\.]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[4] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[5]) else: # TL-NVR5104 V1.0_171205.标准版 match = re.search( r'^(.+)[_ ]([vV][\d\.]+)([^\d]*)_([\d]+)([^\d]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[4] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[5]) else: # TL-IPC545K(P) V3.0_180227(1.0.14)标准版 match = re.search( r'^(.+)[_ ](V[\d\.]+)_(\d+)(([\d\.]+))([^\d]+)$', name) if match: self.logger.debug(match.groups()) item.add_value("product", match[1]) item.add_value("version", match[2]) date = match[3] if len(date) == 6: date = "20" + date item.add_value("date", date) item.add_value("build", match[4] + ' ' + match[5]) else: self.logger.debug("No match for %s" % name) print('http://service.tp-link.com.cn/download/' + quote(product['fileName'])) item.add_value( "url", 'http://service.tp-link.com.cn/download/' + quote(product['fileName'])) item.add_value("description", name) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): url = response.xpath("//div[@class='thumbnail']//a/@href").extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", response.meta['version']) item.add_value("url", url) item.add_value("product", response.meta['product']) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): url =self.firmware_url + response.xpath('//a[@id="downLoadHref"]/@href').extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("date", response.meta['date']) item.add_value("description", response.meta['description']) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): if "product" in response.meta: for entry in response.xpath("//div[@class='tab-content']//tr")[1:]: version = entry.xpath("./td[1]//a//text()").extract_first() url = entry.xpath("./td[2]//a/@href").extract_first() if version is None or url is None: continue # remove unnecessary files to_remove_list = [ "end user license agreement", "eula", "release notes", "mac os", "windows", "guide", "(pdf)", "sample", "client", "manager", "software", "virtual", "control_panel", "activexbypass" ] if any(x in url.lower() for x in to_remove_list) \ or any(x in version.lower() for x in to_remove_list) \ or any(url.endswith(x) for x in ["htm", "html", "pdf", "ova", ".plcm.vc"]): continue url = urllib.parse.urljoin(response.url, PolycomSpider.fix_url(url)), item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", version) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item() # all entries on the product overview pages elif response.xpath("//div[@class='product-listing']" ) and "product" not in response.meta: for entry in response.xpath("//div[@class='product-listing']//li"): if not entry.xpath("./a"): continue text = entry.xpath("./a//text()").extract_first() href = entry.xpath("./a/@href").extract_first().strip() # date = entry.xpath("./span//text()").extract() if any(x in text.lower() for x in ["advisories", "support", "notices", "features"]) \ or href.endswith(".pdf"): continue path = urllib.parse.urlparse(href).path if any(path.endswith(x) for x in [".htm", ".html"]) or "(html)" in text.lower(): yield Request(url=urllib.parse.urljoin( response.url, PolycomSpider.fix_url(href)), meta={"product": text}, headers={"Referer": response.url}, callback=self.parse)
def parse(self, response): for a in response.xpath("//table//tr//td[2]//a"): title = a.xpath('./@title').extract()[0] url = a.xpath('./@href').extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", url) item.add_value("product", self.parse_product(title)) item.add_value("description", title) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): url = response.xpath("//div[@class='thumbnail']//a/@href").extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value( "version", response.meta['version']) item.add_value("url", url) item.add_value("product", response.meta['product']) item.add_value("vendor", self.vendor) yield item.load_item()
def download_item(self, response): url = "https:" + str(response.xpath("//div[@class='downbtns']/a/@href").extract_first()) item = FirmwareLoader(item=FirmwareImage(), response=response, date_fmt=["%Y-%m-%d"]) item.add_value("url", url) item.add_value("version", response.meta["version"]) item.add_value("date", response.meta["date"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): url = self.firmware_url + response.xpath( '//a[@id="downLoadHref"]/@href').extract()[0] item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("date", response.meta['date']) item.add_value("description", response.meta['description']) item.add_value("url", url) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for a in response.xpath("//table//tr//td[2]//a"): title = a.xpath('./@title').extract()[0] url = a.xpath('./@href').extract()[0] item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", url) item.add_value("product", self.parse_product(title)) item.add_value("description", title) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".img"): basename = href.split("/")[-1].split("-") item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("product", self.name) item.add_value("vendor", self.name) item.add_value( "version", basename[-1][0: basename[-1].rfind(".img")]) yield item.load_item()
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href == ".." or href == "/": continue elif href.endswith(".bin") or href.endswith(".upg"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("vendor", self.name) yield item.load_item() elif "/" in href: yield Request(url=urllib.parse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse)
def parse(self, response): for i in range(0, len(response.xpath("//div[@id='main_right']/span[1]/p")), 7): prods = response.xpath("//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 2)).extract()[0].split("\r\n") for product in [x for x in prods]: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("version", "//div[@id='main_right']/span[1]//p[%d]/text()" % (i + 3)) item.add_xpath("url", "//div[@id='main_right']/span[1]//p[%d]/a/@href" % (i + 7)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item() for i in range(0, len(response.xpath("//div[@id='main_right']/span[2]/p")), 5): prods = response.xpath("//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 2)).extract()[0].split(",") for product in [x for x in prods]: item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_xpath("version", "//div[@id='main_right']/span[2]//p[%d]/text()" % (i + 3)) item.add_xpath("url", "//div[@id='main_right']/span[2]//p[%d]/a/@href" % (i + 5)) item.add_value("product", product) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for entry in response.xpath( "//div[@class='main-container']//p|//div[@class='main-container']//ul"): text = entry.xpath(".//text()").extract() for href in entry.xpath(".//a/@href").extract(): if "Firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value( "product", FirmwareLoader.find_product(text)) item.add_value("vendor", self.name) yield item.load_item()
def parse_download(self, response): for link in response.xpath("//div[@id='auto']//a"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract()[0] if ("downloads" in href or "firmware" in href) and \ not href.endswith(".html"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", response.meta["version"]) item.add_value("url", href) item.add_value("description", text) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse_link(self, response): # some items will require captcha authentication and pass a cookie e.g. # DownloadAuthorizationToken = # 7CB8169BFC8848B097BB071118F9E067431714963E3A74A45C8883A70654999980D7F1412CB98B87C802403D74B6A2611122BB3CCEE0B2ACDEEAACA8054B8FFBC4AB2C2CC992649F733AFB2446AA3DC66131E62F0697E9267A374A9E965D1286EC3CFEA1142B5244D497974E5992A3F172581BE78559432DA3A64ECC940D3C43A3C91427EEC5FC712A4ADF64D2FC6C31D62BD8E4417964B31AC6E0B8344EADEA6E81DBB33F522979F3C4FE33ECA4240C188C2C88FAEBC3E0C27AEDF79558E9113F2E7BB2CA261666A26CDA82074F0DC777F2BDB28A5A2588F7F4F67E2A4F04C4DDEE6E3A2A78E2106D2F324986705580070A9016C96007E82332EA1F1D2E9688033F514754555CE186695284B05B24DE6C99F22CCF4F43A7CB5D8AD9053929E3EFDAD40FD20497F1D9ED45BAA4C7CF1C2207C751624D755EBF0C4FF98C9B2E41437E41674C836D80C83C902C4B8B8ADDA23D813D9FA5B3331C36B05CE3C1F479220B7A02 for link in response.xpath("//tbody[@class='etdownloaditems']//tr"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("version", link.xpath( ".//td[@class='column-version']//text()").extract()[0].strip()) item.add_value("url", link.xpath(".//th/a/@href").extract()[0]) item.add_value("description", link.xpath( ".//th/a//text()").extract()[0]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) yield item.load_item()
def parse(self, response): for href in response.xpath("//a/@href").extract(): if href == ".." or href == "/": continue elif href.endswith(".bin") or href.endswith(".upg"): item = FirmwareLoader(item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("vendor", self.name) yield item.load_item() elif "/" in href: yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse)
def parse(self, response): if response.xpath("//select[@id='router']"): for product in response.xpath( "//select[@id='router']/option/@value").extract(): if product and product != "allrouters": yield Request( url=urlparse.urljoin( response.url, "?router=%s" % (product)), headers={"Referer": response.url}, callback=self.parse) elif response.xpath("//td[@id='search_main_content']"): for link in response.xpath("//td[@id='search_main_content']//a"): if link.xpath("./@href"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract() if "download.verizon.net" in href and "firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("description", text[0]) item.add_value("vendor", self.name) yield item.load_item() else: for link in response.xpath("//div[@id='ghfbodycontent']//a"): if link.xpath("./@href"): href = link.xpath("./@href").extract()[0] text = link.xpath(".//text()").extract() if "download.verizon.net" in href and "firmware" in href: item = FirmwareLoader( item=FirmwareImage(), response=response) item.add_value("url", href) item.add_value("description", text[0]) item.add_value("vendor", self.name) yield item.load_item()
def parse_product(self, response): for image in response.xpath( "//div[@id='accordion-2']//tr[position() > 1]"): text = image.xpath("./td[2]//a[1]/text()").extract() if "firmware" in "".join(text).lower(): item = FirmwareLoader(item=FirmwareImage(), response=response, selector=image, date_fmt=["%Y-%m-%d"]) item.add_xpath("date", "td[1]//text()") item.add_value("description", text) item.add_xpath("url", "td[2]//a[1]/@href") item.add_value("product", response.meta["product"]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_download(self, response): json_response = json.loads(response.body_as_unicode()) for file in json_response: if file["subFileType"] == "firmware": item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d/%m/%y"]) item.add_value("version", file["fileVersion"]) item.add_value("date", datetime.datetime.fromtimestamp( int(file["releaseDate"]) / 1000).strftime(item.context.get("date_fmt")[0])) item.add_value("description", file["fileName"]) item.add_value("url", file["downloadUrl"]) item.add_value("product", response.meta["product"]) item.add_value("vendor", self.vendor) yield item.load_item()
def parse_product(self, response): for href in response.xpath("//a/@href").extract(): if href.endswith(".npk") or href.endswith(".lzb"): text = response.xpath("//text()").extract() basename = href.split("/")[-1] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%Y-%b-%d"]) item.add_value("date", item.find_date(text)) item.add_value("url", href) item.add_value("product", basename[0: basename.rfind("-")]) item.add_value("vendor", self.name) item.add_value( "version", FirmwareLoader.find_version_period(text)) yield item.load_item()
def parse_product(self, response): import re #/cn/Uploads/files/20161024/K1_V22.4.2.15.bin print response.text path = re.findall(u"(/cn/Uploads/files/.*?\.bin)", response.text)[0] url = "http://www.phicomm.com/{}".format(path) item = FirmwareLoader( item=FirmwareImage()) item.add_value("url", url), item.add_value("product", response.meta['product']), item.add_value("date", response.meta['date']), item.add_value("version", response.meta['version']), item.add_value("vendor", self.vendor), item.add_value("description", response.meta['description']), yield item.load_item()
def parse(self, response): for link in response.xpath("//a"): text = link.xpath(".//text()").extract()[0] href = link.xpath(".//@href").extract()[0] if ".." in href: continue elif href.endswith('/'): yield Request( url=urlparse.urljoin(response.url, href), headers={"Referer": response.url}, callback=self.parse) elif href.endswith(".gz") and ".iso" not in href: # strip off multiple file extensions basename = os.path.splitext(text)[0] while ".img" in basename or ".iso" in basename: basename = os.path.splitext(basename)[0] basename = basename.split("-") version = FirmwareLoader.find_version_period(basename) # attempt to parse filename and generate product/version # strings remove = [version] if version else [] for i in range(0, len(basename)): if "BETA" in basename[i]: version += "-%s%s" % (basename[i], basename[i + 1]) remove.append(basename[i]) remove.append(basename[i + 1]) elif "RC" in basename[i]: version += "-%s" % (basename[i]) remove.append(basename[i]) elif "RELEASE" in basename[i]: remove.append(basename[i]) basename = [x for x in basename if x not in remove] item = FirmwareLoader( item=FirmwareImage(), response=response, date_fmt=["%d-%b-%Y"]) item.add_value("version", version) item.add_value("url", href) item.add_value("date", item.find_date( link.xpath("following::text()").extract())) item.add_value("product", "-".join(basename)) item.add_value("vendor", self.name) yield item.load_item()