def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) avis = [] filenames = [] try: upToDownName = allUpToDownNames[apkid] appurl = 'http://' + upToDownName + '.en.uptodown.com/android/old' session = requests.Session() logging.debug('Requesting1: ' + appurl) try: appresp = session.get(appurl) apphtml = unicodedata.normalize('NFKD', appresp.text).encode('ascii', 'ignore') appdom = BeautifulSoup(apphtml, 'html5lib') latestver = appdom.find('span', {'itemprop': 'softwareVersion'}) if latestver: # sometimes there is only 1 version and no old versions, and you get the latest-version page of the app instead of the overview of old versions avis.append(ApkVersionInfo(name=apkid, ver=(latestver.contents[0].lstrip('v').strip().encode("ascii") if latestver.contents else ''), # sometimes there is no versionnumber, or they set a v in front of the versionName; it presents unicode for some reason scrape_src='http://' + upToDownName + '.en.uptodown.com/android/download', crawler_name=self.__class__.__name__ )) else: appversions = appdom.findAll('section', {'class': 'container'}) for apk in appversions[0:5]: # limit ourself to only the first 5 results; the chance that there are updates beyond that point is smaller than the chance of having errors in the versionname apkurl = apk.find('a')['href'] apkver = apk.find('span', {'class': 'app_card_version'}).contents avis.append(ApkVersionInfo(name=apkid, ver=(apkver[0].lstrip('v').strip().encode("ascii").decode('utf-8') if apkver else ''), # sometimes there is no versionnumber, or they set a v in front of the versionName; it presents unicode for some reason scrape_src='http:' + apkurl, crawler_name=self.__class__.__name__ )) # END: for appversions # END: if lastestver for avi in avis: if self.report.isThisApkNeeded(avi): logging.debug('Requesting2: ' + avi.scrape_src) try: downloadresp = session.get(avi.scrape_src) downloadhtml = unicodedata.normalize('NFKD', downloadresp.text).encode('ascii', 'ignore') downloaddom = BeautifulSoup(downloadhtml, 'html5lib') avi.download_src = 'http:' + downloaddom.find('iframe', {'class': 'hidden'})['src'] # note that this url will still result in a redirect 302 filenames.append(self.downloadApk(avi)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(avi.scrape_src)) # END: if isThisApkNeeded # END: for avis except: logging.exception('!!! Error parsing html from: "{0}"'.format(appurl)) except KeyError: logging.info('{0} not in uptodown.com dictionary'.format(apkid)) return filenames
def checkPlayStore(self, credentials, lang="en_US"): """ checkPlayStore(androidId): """ filenames = [] logging.debug('Logging in to Play Store with: ' + credentials.androidId) playstore = GooglePlayAPI(credentials.androidId, lang) if playstore.login(authSubToken=credentials.authSubToken): logging.info('{0} searches Play in {1} seconds'.format(credentials.androidId, credentials.delay)) time.sleep(credentials.delay) if 'com.android.vending' in self.report.getAllApkIds(): for storeApk in self.report.dAllApks['com.android.vending']: logging.debug('{0} VendingAPK: vername={1}, vercode={2}'.format(credentials.androidId, storeApk.ver, storeApk.vercode)) playvercode = playstore.playUpdate(storeApk.ver, str(storeApk.vercode)) if playvercode: logging.debug('{0} Play Store update {1}'.format(credentials.androidId, playvercode)) avi = ApkVersionInfo(name ='com.android.vending', vercode =playvercode, download_src=playstore, crawler_name=self.__class__.__name__ ) filenames.append(self.downloadApk(avi, credentials.delay + random.randint(0, credentials.delay), agentvername=storeApk.ver, agentvercode=str(storeApk.vercode))) logging.info('{0} pauses {1} seconds before continuing'.format(credentials.androidId, credentials.delay)) time.sleep(credentials.delay) else: logging.debug('{0} vending apk not in report'.format(credentials.androidId)) for sdk in [19, 21, 22, 23, 24]: logging.debug('{0} - {1}'.format(sdk, self.report.getAllApkIds(playstoreCaps=True))) res = playstore.bulkDetails(self.report.getAllApkIds(playstoreCaps=True), sdk) if res and res.status_code == http.client.OK and res.body: for app in res.body.entry: if app.doc and app.doc.docid: avi = ApkVersionInfo(name =app.doc.docid, vercode =app.doc.details.appDetails.versionCode, download_src=playstore, crawler_name=self.__class__.__name__ ) if self.report.isThisApkNeeded(avi): logging.debug('{0} Update {1}-{2} (Uploaddate {3})'.format(playstore.androidId, avi.name, avi.vercode, app.doc.details.appDetails.uploadDate)) filenames.append(self.downloadApk(avi, credentials.delay + random.randint(0, credentials.delay))) else: logging.debug('{0} Skip {1}-{2} (Uploaddate {3})'.format(playstore.androidId, avi.name, avi.vercode, app.doc.details.appDetails.uploadDate)) else: logging.debug('{0} Empty search entry'.format(playstore.androidId)) continue else: logging.error('{0} Error querying Play Store, status {1}: {2}'.format(playstore.androidId, sdk, res.status_code)) return None # Not found, return empty # END: for sdk else: logging.error('Play Store login failed for {0}'.format(credentials.androidId)) # END: if playstore.login() return filenames
def getApkInfo(self, playstore, apkid, delay): """ getApkInfo(playstore, apkid): Get APK specific information from the Play Store and return it as an ApkVersionInfo object """ for x in range(1, 4): # up to three tries res = playstore.details(apkid) if res.body: if res.body.docV2.details.appDetails.versionCode: # if the versioncode does not exist; it is not offered as a valid download for this device by the Play Store avi = ApkVersionInfo(name =res.body.docV2.docid, ver =res.body.docV2.details.appDetails.versionString.split(' ')[0], # not sure if we need the split here vercode =res.body.docV2.details.appDetails.versionCode, download_src=playstore, crawler_name=self.__class__.__name__ ) logging.info('{0} found details {0} {1}-{2}'.format(playstore.androidId, avi.name, avi.ver, avi.vercode)) return avi else: logging.info('{0} incompatible with {1}'.format(playstore.androidId, apkid)) elif res.status_code == http.client.NOT_FOUND: logging.debug('{0} cannot find {1}'.format(playstore.androidId, apkid)) elif res.status_code == http.client.SERVICE_UNAVAILABLE: wait = delay * x logging.info('{0} too many sequential requests for {1}, wait {2} seconds'.format(playstore.androidId, apkid, wait)) time.sleep(wait) # wait longer with each failed try continue else: logging.error('{0} unknown HTTP status for {1}: {2}'.format(playstore.androidId, apkid, res.status_code)) return None # Not found, return empty else: logging.error('{0} repetitive error 503 for {1}'.format(playstore.androidId, apkid)) return None # Kept receiving 503, return empty
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) filenames = [] url = 'http://apk-dl.com/' + apkid session = requests.Session() logging.debug('Requesting: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') try: dom = BeautifulSoup(html, 'html5lib') apklist = dom.findAll('ul', {'class': 'apks dlist'})[0] apks = apklist.findAll('div', {'class': 'details'}) for apk in apks: items = apk.findAll('div') dApk = {} for item in items: itext = '{0}'.format(item.get_text().encode('ascii', 'ignore')) itext = re.sub('\s', '', itext) itextsp = itext.split(':', 1) if len(itextsp) == 2: dApk[str(itextsp[0])] = str(itextsp[1]) apkurl = apk.find('a', {'class': 'btn btn-success'}) if apkurl: dApk['url'] = 'http:' + apkurl['href'] Debug.printDictionary(dApk) if 'Version' in dApk and 'RequiresAndroid' in dApk: (trash, sdk) = dApk['RequiresAndroid'].split('API:', 1) sdk = sdk[0:-1] (ver, vercode) = dApk['Version'].split('(Code:', 1) ver = ver.split('(', 1)[0].strip() vercode = vercode[0:-1].strip() avi = ApkVersionInfo(name=apkid, sdk=sdk, ver=ver, vercode=vercode, download_src=dApk['url'], crawler_name=self.__class__.__name__ ) if self.report.isThisApkNeeded(avi): filenames.append(self.downloadApk(avi)) except IndexError: logging.info('{0} not supported by apk-dl.com ...'.format(apkid)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) return filenames
def getOneVersionInfo(self, avi): """ getOneVersionInfo(avi): Determines each versions information """ try: url = 'https://www.apkmirror.com' + avi.scrape_src session = requests.Session() session.headers.update(self.headers) logging.debug('Requesting2: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') dom = BeautifulSoup(html, 'html5lib') contentArea = dom.findAll('div', {'class': 'tab-content'})[0] dl_button = contentArea.findAll('a', {'class': 'downloadButton'})[0] appspecs = contentArea.findAll('div', {'class': 'appspec-row'}) avivername = '' avivercode = '' avisdk = '' avitarget = '' avidpi = '' for appspec in appspecs: # Version if appspec.find('svg', {'class': 'apkm-icon-file'}): m = self.reVersion.search(appspec.find('div', {'class': 'appspec-value'}).get_text()) if m: avivername = m.group('VERNAME') avivercode = m.group('VERCODE') logging.debug('debug: "{}" - "{}"'.format(avivername, avivercode)) else: logging.debug('debug: "{}"'.format(appspec.find('div', {'class': 'appspec-value'}).get_text())) # SDK & Target if appspec.find('svg', {'class': 'apkm-icon-sdk'}): m = self.reSdk.search(appspec.find('div', {'class': 'appspec-value'}).get_text()) if m: avisdk = m.group('SDK') m = self.reTarget.search(appspec.find('div', {'class': 'appspec-value'}).get_text()) if m: avitarget = m.group('Target') # DPI if appspec.find('svg', {'class': 'apkm-icon-dpi'}): avidpi = appspec.find('div', {'class': 'appspec-value'}).get_text().replace(', ', '-') return ApkVersionInfo(name=avi.name, ver=avivername, vercode=avivercode, sdk=avisdk, target=avitarget, dpi=avidpi, arch=avi.arch, scrape_src=avi.scrape_src, download_src='https://www.apkmirror.com' + dl_button['href'], crawler_name=self.__class__.__name__) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url))
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) url = 'http://apkbeast.com/' + apkid session = requests.Session() logging.debug('Requesting1: ' + url) resp = session.get(url) if resp.status_code == http.client.OK: html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') try: dom = BeautifulSoup(html, 'html5lib') apkversion = dom.find('p', { 'itemprop': 'softwareVersion' }).get_text() apkurl = dom.find('a', {'class': 'da'})['href'] if apkurl: apkversion = apkversion.strip() avi = ApkVersionInfo(name=apkid, ver=apkversion, crawler_name=self.__class__.__name__) if apkurl[ 0] == '/': # a relative URL; takes us to an intermediate screen avi.scrape_src = 'http://apkbeast.com' + apkurl else: # direct download avi.download_src = apkurl if self.report.isThisApkNeeded(avi): return self.downloadApk(avi) except IndexError: logging.info( '{0} not supported by apk-dl.com ...'.format(apkid)) except: logging.exception( '!!! Error parsing html from: "{0}"'.format(url)) else: logging.info('{0} not supported by APKBeast ...'.format(apkid))
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) filenames = [] url = 'https://apkpure.com/apkpure/' + apkid # the /apkpure/ part just needs to be an arbitrary string session = requests.Session() logging.debug('Requesting1: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') if resp.status_code == http.client.OK: try: dom = BeautifulSoup(html, 'html5lib') apklist = dom.find('div', {'class': 'faq_cat'}) apks = apklist.findAll('dd', {'style': ''}) + apklist.findAll( 'dd', {'style': 'display:none;'}) for apk in apks: m = self.reVersion.search(apk.find('p').get_text()) if m: vername = m.group('VERNAME') vercode = m.group('VERCODE') sdk = m.group('SDK') href = 'https://apkpure.com' + apk.find( 'a', {'class': 'down'})['href'] if href: avi = ApkVersionInfo( name=apkid, sdk=(sdk if sdk else 0), ver=vername, vercode=vercode, scrape_src=href, crawler_name=self.__class__.__name__) if self.report.isThisApkNeeded(avi): avi = self.parseRedirectPage(avi) filenames.append(self.downloadApk(avi)) except IndexError: logging.info( '{0} not supported by apk-dl.com ...'.format(apkid)) except: logging.exception( '!!! Error parsing html from: "{0}"'.format(url)) return filenames
def showMissingApks(dAllApks, maxVerEachApk): """ showMissingApks(dAllApks): """ appsneeded = [] for k in dAllApks.keys(): thisappsneeded = [] for a in dAllApks[k]: maxApk = ApkVersionInfo(ver = maxVerEachApk[k]) if '2280749' in maxApk.ver: # This excludes 'from factor image' apks maxApk.ver = '0' thisappsneeded = [] if a.ver < maxApk.ver: logging.debug('{0}: {1} < maxApk.ver: {2}'.format(k, a.ver, maxApk.ver)) thisappsneeded.append(a.fullString(maxVerEachApk[k])) if len(thisappsneeded) != 0: appsneeded.extend(thisappsneeded) # END: for k in for a in sorted(appsneeded): logging.info(a)
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) url = 'http://apkbeast.com/' + apkid session = requests.Session() logging.debug('Requesting1: ' + url) resp = session.get(url) if resp.status_code == http.client.OK: html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') try: dom = BeautifulSoup(html, 'html5lib') apkversion = dom.find('p', {'itemprop': 'softwareVersion'}).get_text() apkurl = dom.find('a', {'class': 'da'})['href'] if apkurl: apkversion = apkversion.strip() avi = ApkVersionInfo(name=apkid, ver=apkversion, crawler_name=self.__class__.__name__ ) if apkurl[0] == '/': # a relative URL; takes us to an intermediate screen avi.scrape_src = 'http://apkbeast.com' + apkurl else: # direct download avi.download_src = apkurl if self.report.isThisApkNeeded(avi): return self.downloadApk(avi) except IndexError: logging.info('{0} not supported by apk-dl.com ...'.format(apkid)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) else: logging.info('{0} not supported by APKBeast ...'.format(apkid))
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) url = 'http://www.plazza.ir/app/' + apkid + '?hl=en' session = requests.Session() logging.debug('Requesting: ' + url) try: resp = session.get(url, allow_redirects=False) # we get a 302 if application is not found if resp.status_code == http.client.OK: html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') try: dom = BeautifulSoup(html, 'html5lib') latesthref = dom.find('a', {'itemprop': 'downloadUrl'})['href'] latestver = dom.find('div', {'itemprop': 'softwareVersion'}).contents[0].strip() appid = re.search('(^\/dl\/)([0-9]+)(\/1$)', latesthref).group(2) latesturl = session.head('http://www.plazza.ir' + latesthref, allow_redirects=True).url # latestvercode = re.search('(_)([0-9]+)(\.apk)$', latesturl).group(2) #apparently this is NOT a (reliable?) versioncode avi = ApkVersionInfo(name=apkid, ver=latestver, # vercode=latestvercode, download_src=latesturl, crawler_name=self.__class__.__name__ ) if self.report.isThisApkNeeded(avi): return self.downloadApk(avi) # Fetching of older versions is not completed, because it requires VIP accounts # olderapks = dom.find('div', {'style': 'direction: rtl'}).findAll('a', {'target': '_blank'}) # for apk in olderapks: # apkver = re.search('(\/)([0-9]+)(\?.*$|$)', apk['href']).group(2) #number is either end of string or there can be an ? for extra GET parameters # apkurl = session.head('http://www.plazza.ir/dl_version/' + appid + '/' + apkver + '/1',allow_redirects=True).url except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) else: logging.info('{0} not available on plazza.ir'.format(apkid)) except: logging.exception('Connection error to plazza.ir when checking {0} at {1}'.format(apkid, url))
def checkOneApp(self, apkid): """ checkOneApp(apkid): """ logging.info('Checking app: {0}'.format(apkid)) file_name = '{0}.json'.format(apkid) url = 'http://helper.mgccw.com/nclient/sjson/detail/detailInfo.htm?apkId=' + apkid data = Debug.readFromFile(file_name) try: if data == '': session = requests.Session() logging.debug('Requesting: ' + url) resp = session.get(url, allow_redirects=False) if (resp.status_code) == http.client.FOUND: raise ValueError data = json.loads(resp.text) Debug.writeToFile( file_name, json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')), resp.encoding) item = data['data']['appInfo'] avi = ApkVersionInfo(name=item['apkId'], sdk=item['sdkVersion'], ver=item['version'].split(' ')[0], vercode=item['versionCode'], download_src='http://download.mgccw.com/' + item['apkPath'], crawler_name=self.__class__.__name__) if self.report.isThisApkNeeded(avi): return self.downloadApk(avi) except ValueError: logging.info('{0} not supported by mobogenie ...'.format(apkid)) except: logging.exception('!!! Invalid JSON from: "{0}"'.format(url))
def processReportSourcesOutput(self, lines): """ processReportSourcesOutput(lines): Populate a dictionary of all APKs and versions in report created by report_sources.sh """ self.dAllApks = {} sColumns = [ '(?P<name>[a-z][^|]*)', '(?P<arch>[^|]*)', '(?P<sdk>[^|]*)', '(?P<dpi>[^|]*)', '(?P<ver>[^|]*)', '(?P<code>[^|]*)', '(?P<mib>[^|]*)', '(?P<sig>[^|]*)' ] pattern = '^\s+' + '\|'.join(sColumns) + '$' reLine = re.compile(pattern) for line in lines: m = reLine.match(line) if m: name = m.group('name').strip() arch = m.group('arch').strip() sdk = m.group('sdk').strip() dpi = m.group('dpi').strip() ver = m.group('ver').strip() code = m.group('code').strip() avi = ApkVersionInfo(name=name, arch=arch, sdk=sdk, dpi=dpi, ver=ver, vercode=code) # Check if supported and add if it is if avi.vercode in [1, 19, 21, 22, 23, 24, 25, 26, 27, 28]: # Ignore factory image files continue if avi.name not in list(self.dAllApks.keys()): self.dAllApks[avi.name] = [] self.dAllApks[avi.name].append(avi)
def showMissingApks(self): """ showMissingApks(): Populate a list of the needed APKs """ self.appsNeeded = [] # NOTE: This code currently only shows older apks (that need updating). # @mfonville has another scheme based up vercode rules for each # apkid that would be more complete for k in list(self.dAllApks.keys()): thisappsneeded = [] for a in self.dAllApks[k]: maxApk = ApkVersionInfo(ver=self.maxVerEachApk[k]) if a.ver < maxApk.ver: logging.debug('{0}: {1} < maxApk.ver: {2}'.format(k, a.ver, maxApk.ver)) thisappsneeded.append(a.fullString(self.maxVerEachApk[k])) if len(thisappsneeded) != 0: self.appsNeeded.extend(thisappsneeded) # END: for k in for a in sorted(self.appsNeeded): logging.info(a)
def isThisApkNeeded(self, avi): """ def isThisApkNeeded(): Return true if this information passed in is needed per the report data that this class was initialized with """ # Against the list we are looking for if avi.lowername not in list(self.dAllApks.keys()): return False logging.debug(avi.fullString(avi.ver)) logging.debug('Do we have already vercode?') # Do we have the requested vercode already? Or do we have a higher vercode while there is only one variant of these apps? if avi.vercode != 0: if ([ apk for apk in self.dAllApks[avi.lowername] if apk.vercode == avi.vercode ]) or (avi.isVercodeAbsolute() and ([ apk for apk in self.dAllApks[avi.lowername] if apk.vercode >= avi.vercode ])): logging.debug(' DON\'T NEED') return False else: # We only need to run the realvername match if we could not compare the vercode itself logging.debug('Can we use an absolute realvername match?') if avi.isRealverAbsolute(): logging.debug( 'Do we have already a matching absolute realvername?') # Do we have the requested realver already? if avi.realver != '': if [ apk for apk in self.dAllApks[avi.lowername] if apk.realver == avi.realver ]: logging.debug(' DON\'T NEED') return False logging.debug('Is it less than maxVersion?') # Is it < maxVersion? if avi.ver != '': maxApkInfo = ApkVersionInfo(name=avi.lowername, ver=self.maxVerEachApk[avi.lowername]) if avi < maxApkInfo: logging.debug(' DON\'T NEED') return False logging.debug( 'Is SDK a number?' ) # If it is not a number, but a letter it is a preview and undesired by Open GApps if avi.sdk and not isinstance(avi.sdk, int): logging.debug('SdkNotNumber: {0}({1})'.format(avi.name, avi.sdk)) return False logging.debug( 'Is Target a number?' ) # If it is not a number, but a letter it is a preview and undesired by Open GApps if avi.target and not isinstance(avi.target, int): logging.debug('TargetNotNumber: {0}({1})'.format( avi.name, avi.target)) return False logging.debug('Is it less than minSdk?') # Is it < minSdk? if avi.sdk != 0: if avi.sdk < self.minSdkEachApk[avi.lowername]: logging.debug('SdkTooLow: {0}({1})'.format(avi.name, avi.sdk)) return False # Are we dealing with a app that has beta support? # Examples: WebView, GoogleApp if self.needsBetaSupport(avi): logging.debug('beta support ...') # TODO: Needs more thought (?) if not avi.lowername.endswith( '.beta' ): # Make sure we don't promote a beta app to non-beta logging.debug('Do we have already vercode? (beta)') # Do we have the requested vercode (in beta) already? if avi.vercode != 0: if [ apk for apk in self.dAllApks[avi.lowername + '.beta'] if apk.vercode == avi.vercode ]: logging.debug(' DON\'T NEED') return False logging.debug('Is it greater than or equal to maxVersion?') # Is it >= maxVersion (for beta)? if avi.ver != '': maxApkInfo = ApkVersionInfo( name=avi.lowername, ver=self.maxVerEachApk[avi.lowername + '.beta']) if avi >= maxApkInfo: logging.debug(' DON\'T NEED') return False logging.debug('++++ NEED IT ... (beta)') # END: if self.needsBetaSupport(avi): logging.debug('++++ NEED IT ...') return True
def checkOneApp(self, apkid): """ checkOneApp(apkid): Collect all versions for an application """ logging.info('Checking app: {0}'.format(apkid)) filenames = [] try: apkMirrorName = allApkMirrorNames[apkid] # Using the "uploads/?q=" page sorts newest first but is slower # Using the "apk/google-inc/" page is faster loading # For now favor slow load and skip checking all versions (below) url = 'http://www.apkmirror.com/uploads/?q={0}'.format( apkMirrorName) session = requests.Session() logging.debug('Requesting1: ' + url) try: resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode( 'ascii', 'ignore') dom = BeautifulSoup(html, 'html5lib') versions = dom.findAll('div', {'class': 'infoSlide'}) hasVariants = dom.find('svg', {'class': 'tag-icon'}) avis = [] # Skip all version since it is sorted (above) # for version in versions: version = versions[0] verName = version.findAll('span', {'class': 'infoslide-value'}) verName = verName[0].get_text() appNameRow = version.find_previous_sibling( 'div', {'class': 'appRow'}) appNameLink = appNameRow.find('a', {'class': 'fontBlack'}) appName = appNameLink.get_text() appUrl = appNameLink['href'] if 'preview' in appName.lower(): logging.info('!!! Preview Found: ' + appName) else: isBeta = 'beta' in appName.lower() avi = ApkVersionInfo(name=apkid + ('.beta' if isBeta else ''), ver=verName, scrape_src=appUrl) if self.report.isThisApkNeeded(avi): if hasVariants: avis.extend(self.getMultipleVersionInfo(avi)) else: tmp = self.getOneVersionInfo(avi) if tmp: avis.append(tmp) # END: for version in versions: # Determine which versions to download for avi in avis: if self.report.isThisApkNeeded(avi): logging.info('Downloading: "{0}"'.format( avi.getFilename())) filenames.append( self.downloadApk(avi, avi.name.endswith('.beta'))) else: logging.debug('Skipping: "{0}" ({1})'.format( avi.name, avi.scrape_src)) # END: for avi in avis: except: logging.exception( '!!! Error parsing html from: "{0}"'.format(url)) except KeyError: logging.info('{0} not in apkmirror.com dictionary'.format(apkid)) return filenames
def checkOneApp(self, apkid): """ checkOneApp(apkid): Collect all versions for an application """ logging.info('Checking app: {0}'.format(apkid)) filenames = [] try: apkMirrorName = allApkMirrorNames[apkid] # Using the "uploads/?q=" page sorts newest first but is slower # Using the "apk/google-inc/" page is faster loading # For now favor slow load and skip checking all versions (below) url = 'http://www.apkmirror.com/uploads/?q={0}'.format(apkMirrorName) session = requests.Session() session.headers.update(self.headers) logging.debug('Requesting1: ' + url) try: resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') dom = BeautifulSoup(html, 'html5lib') versions = dom.findAll('div', {'class': 'infoSlide'}) hasVariants = dom.find('svg', {'class': 'tag-icon'}) avis = [] # Skip all version since it is sorted (above) # for version in versions: version = versions[0] verName = version.findAll('span', {'class': 'infoslide-value'}) verName = verName[0].get_text() appNameRow = version.find_previous_sibling('div', {'class': 'appRow'}) appNameLink = appNameRow.find('a', {'class': 'fontBlack'}) appName = appNameLink.get_text() appUrl = appNameLink['href'] if 'preview' in appName.lower(): logging.info('!!! Preview Found: ' + appName) else: isBeta = 'beta' in appName.lower() avi = ApkVersionInfo(name=apkid + ('.beta' if isBeta else ''), ver=verName, scrape_src=appUrl) if self.report.isThisApkNeeded(avi): if hasVariants: avis.extend(self.getMultipleVersionInfo(avi)) else: tmp = self.getOneVersionInfo(avi) if tmp: avis.append(tmp) # END: for version in versions: # Determine which versions to download for avi in avis: if self.report.isThisApkNeeded(avi): logging.info('Downloading: "{0}"'.format(avi.getFilename())) filenames.append(self.downloadApk(avi, avi.name.endswith('.beta'))) else: logging.debug('Skipping: "{0}" ({1})'.format(avi.name, avi.scrape_src)) # END: for avi in avis: except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) except KeyError: logging.info('{0} not in apkmirror.com dictionary'.format(apkid)) return filenames
def checkOneId(self, aptoideId): """ checkOneId(aptoideId): Get APK specific information from the Aptoide API and return it as an ApkVersionInfo object if it is tracked by OpenGApps """ file_name = '{0}.json'.format(aptoideId) url = 'http://ws75.aptoide.com/api/7/app/getMeta/app_id={0}'.format( aptoideId) data = Debug.readFromFile(file_name) run = {} run['id'] = aptoideId run['status'] = 'fail' # 'fail', 'empty', 'good' run['time'] = '' run['filename'] = '' if data == '': session = requests.Session() for x in range(1, 4): # up to three tries wait = GlobalDelay * x # logging.info('Waiting {0} seconds before fetching {1}'.format(wait, file_name)) time.sleep(wait) try: logging.debug('Checking ({0}): {1}'.format(x, url)) resp = session.get(url) if resp.status_code == http.client.OK: # Append ID on good http response run['status'] = 'empty' data = resp.json() if 'info' in data and 'status' in data[ 'info'] and data['info']['status'] == 'OK': # Found an APK update the Max. ID run['status'] = 'good' run['time'] = data['data']['modified'] theArchs = data['data']['file']['hardware'].get( 'cpus', []) _arch = 'all' if len(theArchs) > 0: _arch = ','.join(theArchs) avi = ApkVersionInfo( name=data['data']['package'], arch=_arch, sdk=data['data']['file']['hardware']['sdk'], dpi=self.doDpiStuff( data['data']['file']['hardware'].get( 'densities', [])), ver=data['data']['file']['vername'].split(' ') [0], # Look at only the true version number vercode=data['data']['file']['vercode'], download_src=data['data']['file']['path'], malware=( data['data']['file']['malware'] if 'malware' in data['data']['file'] else '' ), # We only have this key if vercode is in options crawler_name=self.__class__.__name__) Debug.writeToFile( file_name, json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')), resp.encoding) # Log AptoideID, Date, ApkID self.logIdAndDate(data['data']) # Check for beta support bCheckMore = False if self.report.needsBetaSupport(avi): import copy avibeta = copy.deepcopy(avi) avibeta.name += '.beta' needBeta = self.report.isThisApkNeeded(avibeta) # Do we already have it if self.report.isThisApkNeeded(avi): if ( avi.malware['rank'] == "warn" and avi.malware['reason'] ['signature_validated']['status'] == "failed" and avi.malware['reason'] ['signature_validated']['signature_from'] == "market" ): # signature matches market, but it does not pass verification logging.error( '{0} is a corrupt or incomplete APK, ignored.' .format(avi.download_src)) else: # Are we sure we still need it after the additional info? if self.report.isThisApkNeeded(avi): run['filename'] = self.downloadApk(avi) # END: if avi.malware if avi.name == 'org.opengapps.app': run['filename'] = '{0}-{1}_aptoideId-{2}.stub.apk'.format( avi.name, avi.vercode, aptoideId) else: pass # logging.error('data2[\'status\']: {0}, when fetching {1}, try {2}'.format(data.get('status', 'null'), file_name, x)) return run elif resp.status_code in [ http.client.UNAUTHORIZED, # 401 http.client.FORBIDDEN, # 403 http.client.NOT_FOUND, # 404 http.client.GONE ]: # 410 run['status'] = 'empty' return run else: pass # logging.error('HTTPStatus2: {0}, when fetching {1}, try {2}'.format(resp.status_code, file_name, x)) except: logging.exception( '!!! Invalid JSON from: "{0}", retry in: {1}s'.format( url, wait)) # END: for x # END: if data return run
def checkOneApp(apkid): """ checkOneApp(apkid): """ dAllApks = Global.dAllApks maxVerEachApk = Global.maxVerEachApk minSdkEachApk = Global.minSdkEachApk logging.info('Checking app: {0}'.format(apkid)) html_name = '{0}.html'.format(apkid) url = 'http://apk-dl.com/' + apkid html = Debug.readFromFile(html_name) if html == '': session = requests.Session() session.proxies = Debug.getProxy() logging.debug('Requesting: ' + url) resp = session.get(url) html = unicodedata.normalize('NFKD', resp.text).encode('ascii', 'ignore') Debug.writeToFile(html_name, html, resp.encoding) try: dom = BeautifulSoup(html, 'html5lib') apklist = dom.findAll('ul', {'class': 'apks dlist'})[0] apks = apklist.findAll('div', {'class': 'details'}) maxApkInfo = ApkVersionInfo(name=apkid, ver=maxVerEachApk[apkid]) for apk in apks: items = apk.findAll('div') dApk = {} for item in items: itext = '{0}'.format(item.get_text().encode('ascii', 'ignore')) itext = re.sub('\s', '', itext) itextsp = itext.split(':', 1) if len(itextsp) == 2: dApk[str(itextsp[0])] = str(itextsp[1]) dApk['url'] = 'http:' + apk.find('a', {'class': 'btn btn-success'})['href'] Debug.printDictionary(dApk) if 'Version' in dApk and 'RequiresAndroid' in dApk: (trash, sdk) = dApk['RequiresAndroid'].split('API:', 1) sdk = sdk[0:-1] (ver, vercode) = dApk['Version'].split('(Code:', 1) ver = ver.split('(', 1)[0] vercode = vercode[0:-1] tmpApkInfo = ApkVersionInfo(name=apkid, sdk=sdk, ver=ver, vercode=vercode) tmpApkInfo.download_url = dApk['url'] if maxApkInfo <= tmpApkInfo: thisSdk = int(tmpApkInfo.sdk) if thisSdk < minSdkEachApk[apkid]: logging.debug('SdkTooLow: {0}({1})'.format(apkid, thisSdk)) continue if not filter(lambda apk: apk.vercode == tmpApkInfo.vercode, dAllApks[apkid]): logging.debug(tmpApkInfo.fullString(maxVerEachApk[apkid])) downloadApk(tmpApkInfo) except IndexError: logging.info('{0} not supported by apk-dl.com ...'.format(apkid)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url))
def getAppVersions(apkInfo): """ getAppVersions(apkInfo): Collect all versions for an applicaiton """ logging.info('Fetching Information for: {0}'.format(apkInfo.apkmirror_name)) html_name = '{0}.html'.format(apkInfo.opengapps_name) url = APKMIRRORBASEURL + APKMIRRORGOOGLEURL2 + apkInfo.url html = Debug.readFromFile(html_name) if html == '': session = requests.Session() logging.debug('Requesting2: ' + url) resp = session.get(url) html = resp.text Debug.writeToFile(html_name, html, resp.encoding) try: dom = BeautifulSoup(html, 'html5lib') latest = dom.findAll('div', {'class': 'latestWidget'})[1] versions = latest.findAll('a', {'class': 'fontBlack'}) dVersions = {} for version in versions: # Ignore duplicate entries (one 'hidden' is shown, # and the other 'visible', is not shown) if 'visible-xs-block' in version.parent['class']: continue verText = '"{0}"'.format(version.get_text().encode('ascii', 'ignore')) if 'beta' in verText.lower() or 'preview' in verText.lower(): logging.info('!!! Beta or Preview Found: ' + verText) else: dVersions[verText] = version['href'] m = apkInfo.reVersion.search(verText) if m: avi = ApkVersionInfo(name=m.group('VERSIONNAME').rstrip('-.'), scrape_url=version['href']) avi.ver = avi.name avi.ver = avi.ver.replace(apkInfo.opengapps_name, '').strip() avi.ver = avi.ver.split(' ')[0] apkInfo.versions.append(avi) else: logging.info('!!! No Matchy: ' + verText) # END: for v in versions: Debug.printDictionary(dVersions) # Determine which versions to download if len(apkInfo.versions) > 0: maxVersionByName = sorted(apkInfo.versions)[-1] logging.debug('Max Version By Name: "{0}"'.format(maxVersionByName.name)) for v in apkInfo.versions: if v.name == maxVersionByName.name: logging.info('Getting Info for: "{0}" ({1})'.format(v.name, v.scrape_url)) getVersionInfo(v) logging.info('Downloading: "{0}"'.format(v.apk_name)) downloadApkFromVersionInfo(v) else: logging.debug('Skipping: "{0}" ({1})'.format(v.name, v.scrape_url)) # END: for v in apkInfo.versions: else: logging.info('No matching APKs found for: {0}'.format(apkInfo.apkmirror_name)) except: logging.exception('!!! Error parsing html from: "{0}"'.format(url)) logging.debug('-'*80)