def downloadApk(self, avi, isBeta=False): """ downloadApk(apkInfo): Download the specified URL to APK file name """ apkname = '{0}-{1}.apk'.format(avi.name.replace('.beta', ''), avi.realver.replace(' ', '_')) logging.info('Downloading "{0}" from: {1}'.format(apkname,avi.download_src)) try: if os.path.exists(apkname): logging.info('Downloaded APK already exists.') return if os.path.exists(os.path.join('.', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ./apkcrawler/).') return if os.path.exists(os.path.join('..', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ../apkcrawler/).') return # Open the url session = requests.Session() session.proxies = Debug.getProxy() r = session.get(avi.download_src,stream=True) #plazza blocks fetching it at one go, we need to stream it in chunks with open(apkname, 'wb') as local_file: for chunk in r.iter_content(1024): local_file.write(chunk) logging.debug(('beta:' if isBeta else 'reg :') + apkname) return (('beta:' if isBeta else '' ) + apkname) except OSError: logging.exception('!!! Filename is not valid: "{0}"'.format(apkname))
def downloadApk(url,package,vercode): """ downloadApk(apkInfo): Download the specified URL to APK file name """ apkname = '{0}-{1}.apk'.format(package, vercode) logging.info('Downloading "{0}" from: {1}'.format(apkname,url)) try: if os.path.exists(apkname): logging.info('Downloaded APK already exists.') return if os.path.exists(os.path.join('.', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ./apkcrawler/).') return if os.path.exists(os.path.join('..', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ../apkcrawler/).') return # Open the url session = requests.Session() session.proxies = Debug.getProxy() r = session.get(url,stream=True) with open(apkname, 'wb') as local_file: for chunk in r.iter_content(1024): local_file.write(chunk) print('{0} '.format(apkname)), sys.stdout.flush() except OSError: logging.exception('!!! Filename is not valid: "{0}"'.format(apkname))
def downloadApk(url, package, vername, vercode, minSdk): """ downloadApk(apkInfo): Download the specified URL to APK file name """ apkname = "{0}_{1}-{2}_minAPI{3}.apk".format(package, vername.replace(" ", "_"), vercode, minSdk) logging.info('Downloading "{0}" from: {1}'.format(apkname, url)) try: if os.path.exists(apkname): logging.info("Downloaded APK already exists.") return if os.path.exists(os.path.join(".", "apkcrawler", apkname)): logging.info("Downloaded APK already exists (in ./apkcrawler/).") return if os.path.exists(os.path.join("..", "apkcrawler", apkname)): logging.info("Downloaded APK already exists (in ../apkcrawler/).") return # Open the url session = requests.Session() session.proxies = Debug.getProxy() r = session.get(url) with open(apkname, "wb") as local_file: local_file.write(r.content) print("{0} ".format(apkname)), sys.stdout.flush() except OSError: logging.exception('!!! Filename is not valid: "{0}"'.format(apkname))
def downloadApk(self, avi, isBeta=False): """ downloadApk(apkInfo): Download the specified URL to APK file name """ apkname = '{0}-{1}.apk'.format(avi.name.replace('.beta', ''), avi.realver.replace(' ', '_')) logging.info('Downloading "{0}" from: {1}'.format(apkname,avi.download_src)) try: if os.path.exists(apkname): logging.info('Downloaded APK already exists.') return if os.path.exists(os.path.join('.', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ./apkcrawler/).') return if os.path.exists(os.path.join('..', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ../apkcrawler/).') return # Open the url session = requests.Session() session.proxies = Debug.getProxy() user_agent = {'User-agent': 'Mozilla/5.0'} #they don't like scripted downloads and then offer their own app instead r = session.get(avi.download_src, headers = user_agent) with open(apkname, 'wb') as local_file: local_file.write(r.content) logging.debug(('beta:' if isBeta else 'reg :') + apkname) return (('beta:' if isBeta else '' ) + apkname) except OSError: logging.exception('!!! Filename is not valid: "{0}"'.format(apkname))
def downloadApk(url,package): """ downloadApk(apkInfo): Download the specified URL to APK file name """ apkname = '{0}.apk'.format(package) logging.info('Downloading "{0}" from: {1}'.format(apkname,url)) try: if os.path.exists(apkname): logging.info('Downloaded APK already exists.') return if os.path.exists(os.path.join('.', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ./apkcrawler/).') return if os.path.exists(os.path.join('..', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ../apkcrawler/).') return # Open the url session = requests.Session() session.proxies = Debug.getProxy() user_agent = {'User-agent': 'Mozilla/5.0'} #they don't like scripted downloads and then offer their own app instead r = session.get(url, headers = user_agent) with open(apkname, 'wb') as local_file: local_file.write(r.content) print('{0} '.format(apkname)), sys.stdout.flush() except OSError: logging.exception('!!! Filename is not valid: "{0}"'.format(apkname))
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) url = 'http://apk-dl.com/' + apkid session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') try: dom = BeautifulSoup(html, 'html5lib') apklist = dom.findAll('ul', {'class': 'apks dlist'})[0] apks = apklist.findAll('div', {'class': 'details'}) for apk in apks: items = apk.findAll('div') dApk = {} for item in items: itext = '{0}'.format(item.get_text().encode('ascii', 'ignore')) itext = re.sub('\s', '', itext) itextsp = itext.split(':', 1) if len(itextsp) == 2: dApk[str(itextsp[0])] = str(itextsp[1]) apkurl = apk.find('a', {'class': 'btn btn-success'}) if apkurl: dApk['url'] = 'http:' + apkurl['href'] Debug.printDictionary(dApk) if 'Version' in dApk and 'RequiresAndroid' in dApk: (trash, sdk) = dApk['RequiresAndroid'].split('API:', 1) sdk = sdk[0:-1] (ver, vercode) = dApk['Version'].split('(Code:', 1) ver = ver.split('(', 1)[0].strip() vercode = vercode[0:-1].strip() avi = ApkVersionInfo(name=apkid, sdk=sdk, ver=ver, vercode=vercode, download_src = dApk['url'] ) if self.report.isThisApkNeeded(avi): return self.downloadApk(avi) except IndexError: logging.info('{0} not supported by apk-dl.com ...'.format(apkid)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url))
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) avis = [] filenames = [] try: upToDownName = allUpToDownNames[apkid] appurl = 'http://' + upToDownName + '.en.uptodown.com/android/old' session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting: ' + appurl) try: appresp = session.get(appurl) apphtml = unicodedata.normalize('NFKD', appresp.text).encode('ascii', 'ignore') appdom = BeautifulSoup(apphtml, 'html5lib') latestver = appdom.find('span', {'itemprop': 'softwareVersion'}) if latestver: # sometimes there is only 1 version and no old versions, and you get the latest-version page of the app instead of the overview of old versions avis.append(ApkVersionInfo(name=apkid, ver=(latestver.contents[0].lstrip('v').strip().encode("ascii") if latestver.contents else ''), # sometimes there is no versionnumber, or they set a v in front of the versionName; it presents unicode for some reason scrape_src='http://' + upToDownName + '.en.uptodown.com/android/download')) else: appversions = appdom.findAll('section', {'class': 'container'}) for apk in appversions[0:5]: # limit ourself to only the first 5 results; the chance that there are updates beyond that point is smaller than the chance of having errors in the versionname apkurl = apk.find('a')['href'] apkver = apk.find('span', {'class': 'app_card_version'}).contents avis.append(ApkVersionInfo(name=apkid, ver=(apkver[0].lstrip('v').strip().encode("ascii") if apkver else ''), # sometimes there is no versionnumber, or they set a v in front of the versionName; it presents unicode for some reason scrape_src=apkurl)) # END: for appversions # END: if lastestver for avi in avis: if self.report.isThisApkNeeded(avi): logging.debug('Requesting: ' + avi.scrape_src) try: downloadresp = session.get(avi.scrape_src) downloadhtml = unicodedata.normalize('NFKD', downloadresp.text).encode('ascii', 'ignore') downloaddom = BeautifulSoup(downloadhtml, 'html5lib') avi.download_src = downloaddom.find('iframe', {'id': 'iframe_download'})['src'] #note that this url will still result in a redirect 302 filenames.append(self.downloadApk(avi)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(avi.scrape_src)) # END: if isThisApkNeeded # END: for avis except: logging.exception('!!! Error parsing html from: "{0}"'.format(appurl)) except KeyError: logging.info('{0} not in uptodown.com dictionary'.format(apkid)) return filenames
def downloadApk(apkInfo): """ downloadApk(apkInfo): Download the specified URL to APK file name """ url = apkInfo['path'] cpu = apkInfo.get('cpu', '') if cpu != '': cpu = '({0})'.format(cpu) dpi = apkInfo.get('screenCompat', '(nodpi)') if dpi != '(nodpi)': dpi = '({0}dpi)'.format(doDpiStuff(dpi, ',')) apkname = '{0}_{1}-{2}_minAPI{3}{4}{5}.apk'.format(apkInfo['package'], apkInfo['vername'].replace(' ', '_'), apkInfo['vercode'], apkInfo['minSdk'], cpu, dpi) logging.info('Downloading "{0}" from: {1}'.format(apkname,url)) try: if os.path.exists(apkname): logging.info('Downloaded APK already exists.') return if os.path.exists(os.path.join('.', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ./apkcrawler/).') return if os.path.exists(os.path.join('..', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ../apkcrawler/).') return # Open the url session = requests.Session() session.proxies = Debug.getProxy() r = session.get(url) with open(apkname, 'wb') as local_file: local_file.write(r.content) print('{0} '.format(apkname)), sys.stdout.flush() except OSError: logging.exception('!!! Filename is not valid: "{0}"'.format(apkname))
def checkOneApp(apkid): """ checkOneApp(apkid): """ dAllApks = Global.dAllApks maxVerEachApk = Global.maxVerEachApk minSdkEachApk = Global.minSdkEachApk logging.info('Checking app: {0}'.format(apkid)) html_name = '{0}.html'.format(apkid) url = 'http://www.plazza.ir/app/' + apkid + '?hl=en' html = Debug.readFromFile(html_name) if html == '': session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') Debug.writeToFile(html_name, html, resp.encoding) try: dom = BeautifulSoup(html, 'html5lib') latestapk = dom.findAll('a', {'itemprop': 'downloadUrl'})[0] appid = re.search('(^\/dl\/)([0-9]+)(\/1$)', latestapk['href']).group(2) latesturl = session.head('http://www.plazza.ir' + latestapk['href'],allow_redirects=True).url latestver = re.search('(_)([0-9]+)(\.apk)$', latesturl).group(2) #We still miss versioncode comparison here downloadApk(latesturl,apkid,latestver) #Fetching of older versions is not completed, because it requires VIP accounts #olderapks = dom.findAll('div', {'style': 'direction: rtl'})[0].findAll('a', {'target': '_blank'}) #for apk in olderapks: # apkver = re.search('(\/)([0-9]+)(\?.*$|$)', apk['href']).group(2) #number is either end of string or there can be an ? for extra GET parameters # apkurl = session.head('http://www.plazza.ir/dl_version/' + appid + '/' + apkver + '/1',allow_redirects=True).url except AttributeError: logging.info('{0} has an invalid version in the download URL ...'.format(apkid)) except IndexError: logging.info('{0} not supported by plazza.ir ...'.format(apkid)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url))
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) url = 'http://www.plazza.ir/app/' + apkid + '?hl=en' session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting: ' + url) try: resp = session.get(url,allow_redirects=False) #we get a 302 if application is not found if resp.status_code == httplib.OK: html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') try: dom = BeautifulSoup(html, 'html5lib') latesthref = dom.find('a', {'itemprop': 'downloadUrl'})['href'] latestver = dom.find('div', {'itemprop': 'softwareVersion'}).contents[0].strip() appid = re.search('(^\/dl\/)([0-9]+)(\/1$)', latesthref).group(2) latesturl = session.head('http://www.plazza.ir' + latesthref,allow_redirects=True).url #latestvercode = re.search('(_)([0-9]+)(\.apk)$', latesturl).group(2) #apparently this is NOT a (reliable?) versioncode avi = ApkVersionInfo(name=apkid, ver=latestver, #vercode=latestvercode, download_src=latesturl ) if self.report.isThisApkNeeded(avi): return self.downloadApk(avi) #Fetching of older versions is not completed, because it requires VIP accounts #olderapks = dom.find('div', {'style': 'direction: rtl'}).findAll('a', {'target': '_blank'}) #for apk in olderapks: # apkver = re.search('(\/)([0-9]+)(\?.*$|$)', apk['href']).group(2) #number is either end of string or there can be an ? for extra GET parameters # apkurl = session.head('http://www.plazza.ir/dl_version/' + appid + '/' + apkver + '/1',allow_redirects=True).url except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) else: logging.info('{0} not available on plazza.ir'.format(apkid)) except: logging.exception('Connection error to plazza.ir when checking {0} at {1}'.format(apkid,url))
def getUrlFromRedirect(self, apkname, url): """ getUrlFromRedirect(url): """ link = '' session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting2: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') try: dom = BeautifulSoup(html, 'html5lib') link = dom.find('span', {'class': 'glyphicon glyphicon-cloud-download'}).parent['href'] except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) return link
def downloadApk(self, apkInfo, isBeta=False): """ downloadApk(apkInfo): Download the specified URL to APK file name """ apkname = '{0}_{1}-{2}_minAPI{3}.apk'.format(apkInfo.name, apkInfo.ver, apkInfo.vercode, apkInfo.sdk) url = self.getUrlFromRedirect(apkname, apkInfo.download_src) if url == '': logging.error('Unable to determine redirect url for ' + apkname) return logging.info('Downloading "{0}" from: {1}'.format(apkname,url)) try: if os.path.exists(apkname): logging.info('Downloaded APK already exists.') return if os.path.exists(os.path.join('.', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ./apkcrawler/).') return if os.path.exists(os.path.join('..', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ../apkcrawler/).') return # Open the url session = requests.Session() session.proxies = Debug.getProxy() r = session.get(url) with open(apkname, 'wb') as local_file: local_file.write(r.content) logging.debug(('beta:' if isBeta else 'reg :') + apkname) return (('beta:' if isBeta else '' ) + apkname) except OSError: logging.exception('!!! Filename is not valid: "{0}"'.format(apkname))
def checkOneApp(apkid): """ checkOneApp(apkid): """ dAllApks = Global.dAllApks maxVerEachApk = Global.maxVerEachApk minSdkEachApk = Global.minSdkEachApk logging.info('Checking app: {0}'.format(apkid)) try: upToDownName = allUpToDownNames[apkid] html_name = '{0}.html'.format(upToDownName) url = 'http://' + upToDownName + '.en.uptodown.com/android/download' html = Debug.readFromFile(html_name) if html == '': session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') Debug.writeToFile(html_name, html, resp.encoding) try: dom = BeautifulSoup(html, 'html5lib') latestapk = dom.findAll('iframe', {'id': 'iframe_download'})[0]['src'] #note that this url will still result in a redirect 302 #We still miss versioncode comparison here downloadApk(latestapk,apkid) #We still miss fetching older versions except IndexError: logging.info('{0} not supported by uptodown.com ...'.format(apkid)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) except KeyError: logging.info('{0} not in uptodown.com dictionary...'.format(apkid))
def getUrlFromRedirect(apkname, url): """ getUrlFromRedirect(url): """ html_name = '{0}_redirect.html'.format(apkname) html = Debug.readFromFile(html_name) link = '' if html == '': session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting2: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') Debug.writeToFile(html_name, html, resp.encoding) try: dom = BeautifulSoup(html, 'html5lib') link = dom.findAll('span', {'class': 'glyphicon glyphicon-cloud-download'})[0].parent['href'] except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) return link
def downloadApk(self, avi, isBeta=False): """ downloadApk(apkInfo): Download the specified URL to APK file name """ apkname = '{0}_{1}-{2}_minAPI{3}.apk'.format(avi.name.replace('.beta', ''), avi.realver.replace(' ', '_'), avi.vercode, avi.sdk) logging.info('Downloading "{0}" from: {1}'.format(apkname,avi.download_src)) try: if os.path.exists(apkname): logging.info('Downloaded APK already exists.') return if os.path.exists(os.path.join('.', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ./apkcrawler/).') return if os.path.exists(os.path.join('..', 'apkcrawler', apkname)): logging.info('Downloaded APK already exists (in ../apkcrawler/).') return # Open the url session = requests.Session() session.proxies = Debug.getProxy() r = session.get(avi.download_src) with open(apkname, 'wb') as local_file: local_file.write(r.content) logging.debug(('beta:' if isBeta else 'reg :') + apkname) return (('beta:' if isBeta else '' ) + apkname) except OSError: logging.exception('!!! Filename is not valid: "{0}"'.format(apkname))
def checkOneApp(apkid): """ checkOneApp(apkid): """ dAllApks = Global.dAllApks maxVerEachApk = Global.maxVerEachApk minSdkEachApk = Global.minSdkEachApk logging.info('Checking app: {0}'.format(apkid)) html_name = '{0}.html'.format(apkid) url = 'http://apk-dl.com/' + apkid html = Debug.readFromFile(html_name) if html == '': session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') Debug.writeToFile(html_name, html, resp.encoding) try: dom = BeautifulSoup(html, 'html5lib') apklist = dom.findAll('ul', {'class': 'apks dlist'})[0] apks = apklist.findAll('div', {'class': 'details'}) maxApkInfo = ApkVersionInfo(name=apkid, ver=maxVerEachApk[apkid]) for apk in apks: items = apk.findAll('div') dApk = {} for item in items: itext = '{0}'.format(item.get_text().encode('ascii', 'ignore')) itext = re.sub('\s', '', itext) itextsp = itext.split(':', 1) if len(itextsp) == 2: dApk[str(itextsp[0])] = str(itextsp[1]) dApk['url'] = 'http:' + apk.find('a', {'class': 'btn btn-success'})['href'] Debug.printDictionary(dApk) if 'Version' in dApk and 'RequiresAndroid' in dApk: (trash, sdk) = dApk['RequiresAndroid'].split('API:', 1) sdk = sdk[0:-1] (ver, vercode) = dApk['Version'].split('(Code:', 1) ver = ver.split('(', 1)[0] vercode = vercode[0:-1] tmpApkInfo = ApkVersionInfo(name=apkid, sdk=sdk, ver=ver, vercode=vercode) tmpApkInfo.download_url = dApk['url'] if maxApkInfo <= tmpApkInfo: thisSdk = int(tmpApkInfo.sdk) if thisSdk < minSdkEachApk[apkid]: logging.debug('SdkTooLow: {0}({1})'.format(apkid, thisSdk)) continue if not filter(lambda apk: apk.vercode == tmpApkInfo.vercode, dAllApks[apkid]): logging.debug(tmpApkInfo.fullString(maxVerEachApk[apkid])) downloadApk(tmpApkInfo) except IndexError: logging.info('{0} not supported by apk-dl.com ...'.format(apkid)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url))