def parse(self, response): tmp = response.xpath('//div[@id="release-pane"]/div').extract_first() doc = lxml.html.fromstring(tmp) tmp = doc.xpath('//h2/text()')[0] version = tmp.split()[1] date = tmp.split('[')[1] date = date.replace(']', '') dateobj = datetime.strptime(date, '%d %b, %Y') datestr = dateobj.strftime('%Y-%m-%d') item = AppMonitorItem() item['name'] = 'Calibre' item['version'] = version item['date'] = datestr notes = '' notes = notes.join( response.xpath( '(//div[@id="content"]//h2)[1]/following-sibling::ul').extract( )) item['notes'] = notes item['id'] = 'calibre' down_urls = [] ver = self._check_version(version) base_url = 'https://download.calibre-ebook.com/' down_urls.append(base_url + ver + '/calibre-portable-installer-' + ver + '.exe') down_urls.append(base_url + ver + '/calibre-' + ver + '.dmg') down_urls.append(base_url + ver + '/calibre-' + ver + '-x86_64.txz') item['download_url'] = down_urls return item
def parse(self, response): version = response.xpath( '//h3[@id="stable"]/following-sibling::div[1]//h2[1]/text()').get( ).split(' ')[0].replace('go', '') down_urls = [] down_url_prefix = 'https://golang.org' path_templ = Template( '//h3[@id="stable"]/following-sibling::div[1]//table//a[contains(text(), "$arch")]/@href' ) down_urls.append( down_url_prefix + response.xpath(path_templ.substitute(arch='linux-amd64')).get()) down_urls.append( down_url_prefix + response.xpath(path_templ.substitute(arch='darwin-amd64')).get()) down_urls.append(down_url_prefix + response.xpath( path_templ.substitute(arch='windows-amd64.msi')).get()) down_urls.append(down_url_prefix + response.xpath( path_templ.substitute(arch='windows-amd64.zip')).get()) item = AppMonitorItem() item['name'] = 'Golang' item['version'] = version item['date'] = None item['notes'] = '' item['id'] = 'go' item['download_url'] = down_urls return item
def _parse_karaf(self, response): core_version = response.xpath( '//main//h3[contains(text(), "Karaf Runtime")]/span/text()').get() item = AppMonitorItem() item['name'] = 'Apache Karaf Runtime' item['version'] = core_version item['date'] = None item['notes'] = '' item['id'] = 'apache-karaf-runtime' item['download_url'] = response.xpath( '//main//h3[contains(text(), "Karaf Runtime")]//following-sibling::p[contains(text(), "Binary Distribution")]/a[contains(text(), "zip")]/@href').get() yield item cellar_version = response.xpath( '//main//h3[contains(text(), "Karaf Cellar")]/span/text()').get() item = AppMonitorItem() item['name'] = 'Apache Karaf Cellar' item['version'] = cellar_version item['date'] = None item['notes'] = '' item['id'] = 'apache-karaf-cellar' item['download_url'] = 'http://karaf.apache.org/download.html#cellar-installation' yield item cave_version = response.xpath( '//main//h3[contains(text(), "Karaf Cave")]/span/text()').get() item = AppMonitorItem() item['name'] = 'Apache Karaf Cave' item['version'] = cave_version item['date'] = None item['notes'] = '' item['id'] = 'apache-karaf-cave' item['download_url'] = 'http://karaf.apache.org/download.html#cave-installation' yield item decanter_version = response.xpath( '//main//h3[contains(text(), "Karaf Decanter")]/span/text()').get() item = AppMonitorItem() item['name'] = 'Apache Karaf Decanter' item['version'] = decanter_version item['date'] = None item['notes'] = '' item['id'] = 'apache-karaf-decanter' item['download_url'] = 'http://karaf.apache.org/download.html#decanter-installation' yield item
def _parse_maven(self, response): version = response.xpath( '//main/section/h2/text()').get().split(' ')[-1] item = AppMonitorItem() item['name'] = 'Apache Maven' item['version'] = version item['date'] = None item['notes'] = '' item['id'] = 'apache-maven' item['download_url'] = response.xpath( '//main/section/section/table//a[contains(text(),"bin.zip")]/@href').get() return item
def parse(self, response): version = response.xpath( '//div[@class="main-content"]//p[contains(text(), "Version")]/text()' ).get().strip().rsplit(' ')[-1] item = AppMonitorItem() item['name'] = 'fman' item['version'] = version item['date'] = '' item['id'] = 'fman' item['download_url'] = 'https://fman.io/download' item['notes'] = 'Changelog: https://fman.io/changelog' yield item
def parse_linux(self, response): version = response.xpath( '//div[@class="banner"]/p[@class="banner_txt"]/text()').get() item = AppMonitorItem() item['name'] = 'WPS(Linux)' item['version'] = version item['date'] = None item['notes'] = '' item['id'] = 'wps-linux' item['download_url'] = response.xpath( '//div[@class="box"]//a[contains(@href, "amd64.deb")]/@href').get( ) return item
def parse(self, response): app_id = response.request.url.rsplit('/', 3)[-3] json_dict = response.json() version = json_dict['tag_name'] date = json_dict['created_at'] item = AppMonitorItem() item['name'] = app_id item['version'] = version item['date'] = date item['notes'] = '' item['id'] = app_id item['download_url'] = json_dict['html_url'] return item
def _parse_felix(self, response): base_path = '//div[@class="main"]//table[@class="table"]/tbody/tr/td[contains(text(), "Felix Framework Distribution")]' version = response.xpath( base_path + '/following-sibling::td[1]/text()').get().split(' ')[0] item = AppMonitorItem() item['name'] = 'Apache Felix' item['version'] = version item['date'] = None item['notes'] = 'Changelog: ' + response.xpath(base_path + '/following-sibling::td[1]/a/@href').get() item['id'] = 'apache-felix' item['download_url'] = response.xpath(base_path + '/following-sibling::td[2]/a[contains(text(), "zip")]/@href').get() return item
def _parse_realvnc(self, response, name, app_id): tmp = response.xpath( '//div[contains(@id, "download-link")]/a[contains(@href, ".exe")]/@href' ).get() version = tmp.split('-')[-2] down_url = 'https://www.realvnc.com' + tmp item = AppMonitorItem() item['name'] = name item['version'] = version item['date'] = None item['notes'] = '' item['id'] = app_id item['download_url'] = down_url return item
def parse(self, response): tmp = response.xpath( '//main//select/option[text()[re:test(.,".*current\srelease\)$")]]/text()' ).get() version = tmp.split()[0] item = AppMonitorItem() item['name'] = 'MongoDB current release' item['version'] = version item['date'] = '' item['notes'] = '' item['id'] = 'mongodb' item['download_url'] = 'https://www.mongodb.org/dl/win32/' return item
def parse(self, response): version = response.url.rsplit('/', 1)[-1] date = response.xpath('//div[@id="versionPublishedDate"]/text()').get() date = datetime.strptime( date.split('Updated in', 1)[1].strip(), '%b %d, %Y') datestr = date.strftime('%Y-%m-%d') item = AppMonitorItem() item['name'] = 'jfrog-artifactory-oss' item['version'] = version item['date'] = datestr item['notes'] = '' item['id'] = 'artifactory' item['download_url'] = 'https://bintray.com' + response.xpath( '//div[@id="main-content"]//div[@class="nodeDetails"]/a/@href' ).get() return item
def parse(self, response): version = response.xpath( '//section//div[@class="panel-body"]/p/strong/text()').get() down_urls = [] down_urls.append( response.xpath('//a[@id="desktop-download-primary"]/@href').get()) down_urls.append( response.xpath( '//a[@id="desktop-download-secondary"]/@href').get()) item = AppMonitorItem() item['name'] = 'Allway Sync' item['version'] = version item['date'] = None item['notes'] = '' item['id'] = 'allwaysync' item['download_url'] = down_urls return item
def parse_mac(self, response): tmp = response.xpath( '//div[@class="banner"]/p[@class="banner_txt"]/text()').get( ).split('/') version = tmp[0] version_date = datetime.strptime(tmp[1], '%Y.%m.%d') datestr = version_date.strftime('%Y-%m-%d') down_url = response.xpath( '//div[@class="banner"]/p[@class="banner_txt"]/preceding-sibling::a/@data-href' ).get() item = AppMonitorItem() item['name'] = 'WPS(MAC)' item['version'] = version item['date'] = datestr item['notes'] = '' item['id'] = 'wps-mac' item['download_url'] = down_url return item
def parse(self, response): app_id = response.request.url.rsplit('/', 3)[-1] version = response.xpath('//main/h1')[0].xpath('text()').get().split( ' ')[-1] date = response.xpath( '//main//p[contains(text(), "Published: ")]/text()').get().split( ': ')[-1] date = datetime.strptime(date, '%B %d, %Y') date = datetime.strftime(date, '%Y-%m-%d') down_url = response.xpath( '//main//a[@data-linktype="external"]/@href').get() item = AppMonitorItem() item['name'] = app_id item['version'] = version item['date'] = date item['notes'] = '' item['id'] = app_id item['download_url'] = down_url return item
def _parse_tomcat(self, ver_no, response): str_ver_no = str(ver_no) version = response.xpath( '//main//div[@id="content"]/h3[contains(text(), "' + str_ver_no + '.")]/text()').get() item = AppMonitorItem() item['name'] = 'Apache Tomcat ' + str_ver_no item['version'] = version item['date'] = None item['notes'] = '' item['id'] = 'apache-tomcat' + str_ver_no down_urls = [] down_urls.append(response.xpath( '//main//div[@id="content"]//li[contains(text(), "Core")]/ul/li/a[text()[re:test(., "^zip$")]]/@href').get()) down_urls.append(response.xpath( '//main//div[@id="content"]//li[contains(text(), "documentation")]/ul/li/a/@href').get()) down_urls.append(response.xpath( '//main//div[@id="content"]//li[contains(text(), "Deployer")]/ul/li/a[text()[re:test(., "^zip$")]]/@href').get()) item['download_url'] = down_urls return item
def parse(self, response): title = response.xpath('//table[@class="utilcaption"]//td')[1].xpath( 'text()').get() version = re.search(r'v([\d.]+)', title).group(0) app_id = title.split(version)[0].strip() r = response.xpath('//a[@class="downloadline"]') down_urls = [] for i in r: x = i.xpath('@href').get() if not x.startswith('http'): x = 'http://www.nirsoft.net/utils/' + x down_urls.append(x) item = AppMonitorItem() item['name'] = app_id item['version'] = version item['date'] = None item['notes'] = '' item['id'] = app_id item['download_url'] = down_urls return item
def _parse_item(self, app_id, tag, response): if tag == 'Windows': dwn_url = response.xpath( '//div[@id="download-area"]//a[contains(text(), "Windows")]/@href' ).get() version = dwn_url.rsplit('/')[-1].replace('.exe', '').replace( 'SourceTreeSetup-', '') else: dwn_url = response.xpath( '//div[@id="download-area"]//a[contains(text(), "Mac")]/@href' ).get() version = dwn_url.rsplit('/')[-1].replace('.zip', '').replace( 'Sourcetree_', '') item = AppMonitorItem() item['name'] = 'SourceTree ' + tag item['version'] = version item['date'] = None item['notes'] = '' item['id'] = app_id item['download_url'] = dwn_url yield item
def parse_node(self, response): tmp = response.xpath( '//a[text()[re:test(.,"^node.*x64\.msi$")]]/text()').extract_first( ) version = tmp.split('-')[1] tmp = response.xpath( '//a[text()[re:test(.,"^node.*x64\.msi$")]]/following-sibling::text()' ).extract_first() date = tmp.strip().split()[0] tmp = response.url.rsplit('/', 2)[-2] tmp = tmp.split('-')[1] item = AppMonitorItem() item['name'] = 'Node.js ' + tmp item['version'] = version item['date'] = date if 'latest-v12' in response.url: item['notes'] = '<a href="https://github.com/nodejs/node/blob/master/doc/changelogs/CHANGELOG_V12.md#' + \ version + '">Changelog</a>' else: item['notes'] = '<a href="https://github.com/nodejs/node/blob/master/doc/changelogs/CHANGELOG_V10.md#' + \ version + '">Changelog</a>' item['id'] = 'node-' + tmp if 'latest-v12' in response.url: item['download_url'] = 'https://nodejs.org/dist/latest-v12.x/' + \ response.xpath( '//a[text()[re:test(.,"^node.*x64\.msi$")]]/@href').get() else: item['download_url'] = 'https://nodejs.org/dist/latest-v10.x/' + \ response.xpath( '//a[text()[re:test(.,"^node.*x64\.msi$")]]/@href').get() return item
def parse(self, response): json_dict = json.loads(response.text) filtered_dict = [x for x in json_dict if x['os'] == 'win64'] versions = filtered_dict[0]['versions'] output = [x for x in versions if x['channel'] == 'stable'] version = '{current_version}'.format(**output[0]) date = datetime.strptime('{current_reldate}'.format(**output[0]), '%m/%d/%y') datestr = date.strftime('%Y-%m-%d') previous_version = '{previous_version}'.format(**output[0]) item = AppMonitorItem() item['name'] = 'google-chrome-browser' item['version'] = version item['date'] = datestr item[ 'notes'] = '<a href="https://chromium.googlesource.com/chromium/src/+log/{}..{}?pretty=fuller&n=10000">Changelog</a>'.format( previous_version, version) item['id'] = 'chrome' item[ 'download_url'] = 'https://www.google.com/intl/en/chrome/browser/desktop/index.html?standalone=1' return item
def parse(self, response): version = response.xpath( '//h3[contains(@id, "platformpackages")]/text()').get().strip( ).split(' ')[0] item = AppMonitorItem() item['name'] = 'VirtualBox' item['version'] = version item['date'] = None item['notes'] = '' item['id'] = 'virtualbox' urls = [] urls.append( response.xpath( '//a[@class="ext-link"][contains(text(), "Windows hosts")]/@href' ).get()) urls.append( response.xpath( '//a[@class="ext-link"][contains(text(), "OS X hosts")]/@href' ).get()) urls.append('https://www.virtualbox.org/wiki/Linux_Downloads') item['download_url'] = urls return item
def parse(self, response): tmp = response.xpath('//h1/text()').get() name = tmp.split(' ')[1] version = tmp.split(' ')[2] item = AppMonitorItem() item['name'] = name item['version'] = version tmp = response.xpath( '//div[@id="main"]/p[text()[re:test(., "^Release\sDate.*")]]/text()' ).get() date = tmp.split(':')[1] item['date'] = date.strip() notes = '' tmp = response.xpath('//div[@id="main"]/div/ol').extract() notes = notes.join(tmp) item['notes'] = notes item['id'] = 'npp' item['download_url'] = 'https://notepad-plus-plus.org' + response.xpath( '//div[@id="main"]//a[text()[re:test(., "^Notepad.*zip.*x64$")]]/@href' ).get() return item