def recursiveHealthList(startID, list, markedList): file = open(r"C:\Users\jake_\OneDrive\Desktop\Macquarie University\Personal Projects\Cybersecurity\Django\three\mysite\ThesisStuff\thesisList.txt", "a+")#write mode file.write("Id," +"\t" + "Category,"+"\t"+"Price"+"\t"+"Installs"+"\n") count = 0 for x in play_scraper.similar(startID): price = play_scraper.details(x.get('app_id')).get('price') category = play_scraper.details(x.get('app_id')).get('category') installs = play_scraper.details(x.get('app_id')).get('installs') if 'HEALTH_AND_FITNESS' in play_scraper.details(x.get('app_id')).get('category') and price == '0': if x.get('app_id') not in list: file = open(r"C:\Users\jake_\OneDrive\Desktop\Macquarie University\Personal Projects\Cybersecurity\Django\three\mysite\ThesisStuff\thesisList.txt", "a+") file.write(x.get('app_id')+",\t"+ str(category) +",\t"+price+",\t"+installs+"\n") count += 1 file.close() list.append(x.get('app_id')) print("adding to list") else: #file = open(r"C:\Users\jake_\OneDrive\Desktop\Macquarie University\Personal Projects\Cybersecurity\Django\three\mysite\ThesisStuff\thesisList.txt", "a+") # file.write(x.get('app_id')+",\t"+ "already in list \n") print("not adding to list") # file.close() #file.`write(x.get('app_id')+'\n') markedList.append(startID) for x in list: if x not in markedList: recursiveHealthList(x,list,markedList) break;
def get_store_infos(self, id, str): info = pl.details(id) temp_page = self.driver.get(str) try: privacy_page = WebDriverWait(self.driver, 10).until( EC.presence_of_element_located( (By.XPATH, "//a[@jsname='Hly47e']"))) except Exception: return None privacy_page.click() privacy_elements = [] privacy_elements = self.driver.find_elements_by_xpath( "//li[@class='BCMWSd']") i = 0 while privacy_elements == [] and i < WAIT_CYCLE: time.sleep(0.005) privacy_elements = self.driver.find_elements_by_xpath( "//li[@class='BCMWSd']") i += 1 regular_expression = re.compile( "(<li class=\"BCMWSd\"><span>|</span></li>)") privacy_elements_list = [ re.sub(regular_expression, "", element.get_attribute("outerHTML")) for element in privacy_elements ] self.apps_privacy_dataset.append([ info.get('app_id'), privacy_elements_list, info.get('price'), info.get("iap"), info.get("iap_range") ])
def request_data_from_google(args): """ Retrieve app data from the Google Play Store via the play_scraper from pypi Params: args:type(str) > must be valid google package name """ all_responses = [] for arg in args: response = play_scraper.details(arg) parsed_response = [ {'title': response['title']}, {'category': response['category']}, {'average_user_rating': response['score']}, {'review_count': response['reviews']}, {'last_updated': response['updated']}, {'installs': response['installs']}, {'current_version': response['current_version']}, {'package_name': response['app_id']}, {'minimum_os_version': response['required_android_version']} ] all_responses.append(parsed_response) return all_responses
def check_play_store(app, play_store_id): """ Check Google PlayStore for updates. :param app: str Name of the application. :param play_store_id: str Application identifier. :return: None """ # We are not reinventing the wheel for Play Store since there is already # a module which does the stuff which we want. current_value = details(play_store_id).get('updated', None) if current_value: date_val = parse(current_value) current_value = '{dd}/{mm}/{yyyy}'.format(dd=str(date_val.day), mm=str(date_val.month), yyyy=str(date_val.year)) LOGGER.info('PLAY STORE: {app} - {upd}'.format(app=app, upd=current_value)) old_value = APP_DATA_MAP[app].get('play_store', None) if not old_value or old_value != current_value: update_data_store(app, 'play_store', current_value) else: play_store_url = cfg.BASE_PLAY_STORE_URL.format(id=play_store_id) LOGGER.error('App=%s - %s - did not fetch any results.', app, play_store_url) FAILURES.append( 'App={app} - {url} - did not fetch any results.'.format( app=app, url=play_store_url))
def main(): app_id = 'com.whatsapp' filename = 'contextual_features_whatsapp' # no need to specify filename extension app_details = play_scraper.details(app_id) formatted_app_details = format_dictionary(app_details) write_to_csv(filename, formatted_app_details) write_to_json(filename, formatted_app_details)
def category(app): package_name = app['pkg_name'] store = app['markets'] sleep(0.1) if 'play.google.com' in store: try: details = play_scraper.details(package_name) category = ' '.join((details['category'])) except Exception as e: raise ValueError('Exception in play_scraper') elif 'anzhi' in store: try: details = anzhi_scraper.details(package_name) c = translate(details['category']) prefix = 'Category: ' category = c[len(prefix):] if c.startswith(prefix) else c except Exception as e: raise ValueError('Exception scraping {} from anzhi: {}'.format( package_name, e)) elif 'appchina' in store: try: details = appchina_scraper.details(package_name) category = translate(details['category']) except Exception as e: raise ValueError('Exception scraping {} from Appchina: {}'.format( package_name, e)) else: raise ValueError('No data for the store {}'.format(store)) if not category.strip(): raise ValueError('Empty category') return category
def get_app_details_using_id(id): app_detail = play_scraper.details(id) title = app_detail['title'] category = app_detail['category'] new_string = str(category)[1:-1] category_new = new_string.replace("'", "") return title, category_new
def grab_application_name_description_icon(package_name, online_lookup) : """ @param package_name : package name @rtype : (description, icon) string tuple """ if not(online_lookup): return ERROR_APP_DESC_NOT_FOUND, ERROR_APP_DESC_NOT_FOUND try : app_details = play_scraper.details(package_name) if app_details: name = app_details['title'] if 'title' in app_details else ERROR_APP_DESC_NOT_FOUND desc = app_details['description'] if 'description' in app_details else ERROR_APP_DESC_NOT_FOUND icon_link = app_details['icon'] if 'icon' in app_details else ERROR_APP_DESC_NOT_FOUND return desc, "Icon link: %s" % icon_link else: log.warn("'%s' application's description and icon could not be found in the page" % str(package_name)) return ERROR_APP_DESC_NOT_FOUND, ERROR_APP_DESC_NOT_FOUND except ValueError: log.warn("'%s' application name does not exist on Google Play" % str(package_name)) return ERROR_APP_DESC_NOT_FOUND, ERROR_APP_DESC_NOT_FOUND
def playstore(request): # Retrieve android app information from play_scraper context = {} val = dict(request.GET)['package_val'] try: d = play_scraper.details(val[0]) except Exception as e: # print(e) return JsonResponse({'error': str(e)}) if len(d['description']) > 200: d['description'] = (d['description'])[:200] + ' ...' else: pass context = { 'title': d['title'], 'icon': d['icon'], 'rating': float(d['score']), 'reviews': d['reviews'], 'description': d['description'], 'author': d['developer'], 'developerLink': d['developer_url'], 'url': d['url'], 'installs': 'Installs: ' + d['installs'], } return JsonResponse(context)
def get_app_info(): app_url = request.args.get('app_url') if app_url is None: return "{\"error\": \"You need to specify an app_url argument\"}", 400 try: parsed_app_url = urlparse.urlparse(app_url) app_id = urlparse.parse_qs(parsed_app_url.query)['id'][0] except Exception: return "{\"error\": \"The Playstore URL is invalid\"}", 400 app_details = play_scraper.details(app_id) app_json = { "app_id": app_details.get('app_id'), "title": app_details.get('title'), 'icon': app_details.get('icon'), "video": app_details.get('video'), "price": app_details.get('price'), "description": app_details.get('description_html') } return str(json.dumps(app_json)), 200
def grab_application_name_description_icon(package_name, online_lookup): """ @param package_name : package name @rtype : (description, icon) string tuple """ if not (online_lookup): return ERROR_APP_DESC_NOT_FOUND, ERROR_APP_DESC_NOT_FOUND try: app_details = play_scraper.details(package_name) if app_details: name = app_details[ 'title'] if 'title' in app_details else ERROR_APP_DESC_NOT_FOUND desc = app_details[ 'description'] if 'description' in app_details else ERROR_APP_DESC_NOT_FOUND icon_link = app_details[ 'icon'] if 'icon' in app_details else ERROR_APP_DESC_NOT_FOUND return desc, "Icon link: %s" % icon_link else: log.warning( "'%s' application's description and icon could not be found in the page" % str(package_name)) return ERROR_APP_DESC_NOT_FOUND, ERROR_APP_DESC_NOT_FOUND except ValueError: log.warning("'%s' application name does not exist on Google Play" % str(package_name)) return ERROR_APP_DESC_NOT_FOUND, ERROR_APP_DESC_NOT_FOUND
def list_of_details_of_collection(category): f = open('data/' + category + '.txt', 'w') print('Finding results for ' + category) for collection in COLLECTIONS: for page in range(0, 42): try: scraper = play_scraper.collection(collection=collection, category=category, results=120, page=page) except: break list_of_ids = [] list_of_details = [] for item in scraper: list_of_ids.append(item['app_id']) for id in list_of_ids: a = play_scraper.details(id) b = { a['app_id']: [ a['title'], a['developer_id'], a['installs'], a['developer_url'], a['developer_email'] ] } list_of_details.append(b) try: f.write(str(b) + '\n') except: break print(category + ' Done\n')
def _scrape_play_store(app): try: details = play_scraper.details(app) logger.info('Found Play Store data for %s' % app) return details except Exception as e: logger.warning('Exception while scraping for %s' % app) return None
def get_by_id(self, app_id, lang, country): status = 200 try: response = play_scraper.details(app_id, lang, country) except ValueError: response = None status = 404 if 200 == status: return AppDetails(response) return response
def grab_application_detail(package_name): try: app_details = play_scraper.details(package_name) return app_details except ValueError: log.warning("'%s' application name does not exist on Google Play" % str(package_name)) return ERROR_APP_DESC_NOT_FOUND, ERROR_APP_DESC_NOT_FOUND
def getapp(): appId = request.args.get('appId', None) try: x = play_scraper.details(appId) res = ScraperResult(x.get('title'), x.get('icon'), x.get('developer_email')) except: res = ScraperResult(None, None, None) return json.dumps(res.__dict__)
def get_app_details(package_id): """Get App Details form PlayStore.""" try: logger.info('Fetching Details from Play Store: %s', package_id) det = play_scraper.details(package_id) det.pop('description_html', None) det['error'] = False except Exception: logger.warning('Unable to get app details.') det = {'error': True} return det
def details(pkname): try: dev = play_scraper.details(pkname) # print(dev) for key in dev: if key == 'developer_id': # print(key+ " "+ dev[key]) devid = dev[key] return devid except Exception as e: print(e)
def get_data_from_play_store(application_id): application = None try: play_store_app_data = play_scraper.details(application_id) application = map_app_from_play_store(play_store_app_data) except Exception as err: logger.error("Error getting data from the play store: {0}".format(err)) raise err return application
def _get_play_store_app_data(url): app_id = __parse_store_app_url(url, "play.google.com", "id") try: result = play_scraper.details(app_id) # XXX: result['summary'] = "TODO: missing in play_scraper. Check it's feasible to modify its code and get summary value." return result except Exception as err: # Just assume any communication error as item not found. Could be improved to parse the error thrown: raise GetStoreDataItemNotFound() from err
def get_app_description(packageName): try: return play_scraper.details(packageName)['description'] except ValueError as ve: try: potential_app = play_scraper.search(packageName, detailed=True) return potential_app['description'] except Exception as e: return '' except Exception as e: return ''
def get_app_details(package_id): '''Get App Details form PlayStore''' try: logger.info("Fetching Details from Play Store: %s", package_id) det = play_scraper.details(package_id) det.pop('description_html', None) det["error"] = False except Exception as exp: logger.warning("Unable to get app details. %s", exp) det = {"error": True} return det
def __init__(self, package_id): super().__init__() self._id = package_id # Attempt to get Play Store info try: app_data = play_scraper.details(self._id) self._title = app_data["title"] self._dev = app_data["developer"] self._play = True except Exception: self._play = False
def get_app_category(pkg_name: str) -> str: """\ Returns the category of the app in play store given its package name pkg_name. """ category = '' try: category = play_scraper.details(pkg_name)['category'][0] except: print("exception occurred in retrieving app category") return category
def app_details(app_id): for i in range(3): try: return details(app_id) except (ReadTimeout, ConnectionError): print("ReadTimeout error, waiting for " + str(i**3) + "seconds.") except (HTTPError, ValueError): print("url for " + str(app_id) + "not found") return except AttributeError: print("AttributeError") time.sleep(i**3)
def home(): if os.environ.get('GAE_ENV') == 'standard': unix_socket = '/cloudsql/{}'.format(db_connection_name) db = pymysql.connect(user=db_user, password=db_password, unix_socket=unix_socket, db=db_name, charset="utf8mb4") else: host = '127.0.0.1' db = pymysql.connect(user=db_user, password=db_password, unix_socket=host, db=db_name, charset="utf8mb4") top10app = app_fetcher() top10appshow = [] cursor = db.cursor() for myapp in top10app: cursor.execute("SELECT app_id from myapp where app_id= '%s';" % myapp) result = cursor.fetchall() dict = play_scraper.details(myapp) for key, val in dict.items(): if (val == None): if (key == 'video'): dict[key] = 'https://www.youtube.com/watch?v=B-3yZwaGD_k' else: dict[key] = "OPPS!" if (key == 'description' and len((str)(dict[key])) > 1000): dict[key] = (str)(dict[key][:1000]) top10appshow.append(dict) if (len(result) == 0): try: cursor.execute( "insert into myapp(app_id,category,description,developer,developer_address,developer_email,icon,installs,reviews,score,title,url,video) values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');" % (dict['app_id'], dict['category'][0], "description", dict['developer'], dict['developer_address'], dict['developer_email'], dict['icon'], dict['installs'], dict['reviews'], dict['score'], dict['title'], dict['url'], dict['video'])) except: print(dict['description']) db.commit() db.close() return render_template('startup_page.html', data=top10appshow)
def select_app(self, book, event): # self.book = book # self.url = "https://rapidapi.p.rapidapi.com/details" # self.querystr = {"store":"google","id":self.book.get("id"),"language":"en"} # self.headers = { # 'x-rapidapi-host': "app-stores.p.rapidapi.com", # 'x-rapidapi-key': "YOUR-API-KEY" # } # self.response = requests.request("GET", self.url, headers=self.headers, params=self.querystr) # self.dict = json.loads(self.response.text) self.dict = play_scraper.details(book.get("app_id")) sentimento.app_page.showdetails(self.dict) sentimento.screen_manager.transition.direction = "left" sentimento.screen_manager.current = "App"
def get(self): top10app = app_fetcher() top10appshow = [] client = datastore.Client('cloudapp-280210') for myapp in top10app: query = client.query(kind='App') query.add_filter('app_id', '=', myapp) results = list(query.fetch()) dict = play_scraper.details(myapp) for key, val in dict.items(): if (val == None): if (key == 'video'): dict[ key] = 'https://www.youtube.com/watch?v=B-3yZwaGD_k' else: dict[key] = "OPPS!" if (key == 'description' and len((str)(dict[key])) > 1000): dict[key] = (str)(dict[key][:1000]) top10appshow.append(dict) if (len(results) == 0): try: app_key = client.key('App') new_user = datastore.Entity(key=app_key) new_user['app_id'] = dict['app_id'] new_user['category'] = dict['category'][0] new_user['description'] = dict['description'] new_user['developer'] = dict['developer'] new_user['developer_address'] = dict['developer_address'] new_user['developer_email'] = dict['developer_email'] new_user['icon'] = dict['icon'] new_user['installs'] = dict['installs'] new_user['reviews'] = dict['reviews'] new_user['score'] = dict['score'] dict['title'] = dict['title'] new_user['url'] = dict['url'] new_user['video'] = dict['video'] client.put(new_user) except: print(dict['description']) data = top10appshow path = os.path.join(os.path.dirname(__file__), 'startup_page.html') self.response.out.write(template.render(path, data))
def rescrape_data(): """ Re scrapes data from play store to datastore db """ category_to_app_ids_mapping = {} app_id_to_obj_mapping = {} app_ids = [] # Make play store api call r = urlfetch.fetch(PLAY_STORE_URL) # self.response.write(r.content) soup = BeautifulSoup(r.content, 'html.parser') all_categories = soup.find_all('div', class_="Ktdaqe") for category in all_categories: top_apps_in_category = category.find_all('div', class_='vU6FJ p63iDd') top_apps_in_category = top_apps_in_category[0:3][::-1] name = category.find('div', class_='xwY9Zc').text if not category_to_app_ids_mapping.get(name): category_to_app_ids_mapping[name] = [] for item in top_apps_in_category: app_link = item.a['href'] app_id = app_link.split('?')[1].split('id=')[1] app_ids.append(app_id) category_to_app_ids_mapping[name].append(app_id) query_result = Application.all().filter('app_id IN', app_ids) for obj in query_result: app_id_to_obj_mapping[obj.app_id] = obj for key, value in category_to_app_ids_mapping.items(): for app_id in value: app_details = play_scraper.details(app_id) app_obj = app_id_to_obj_mapping.get(app_id) if not app_obj: app_obj = Application(app_id=app_id) app_obj.category = key app_obj.developer = app_details['developer'] app_obj.title = app_details['title'] app_obj.icon = app_details['icon'] app_obj.screenshots = app_details['screenshots'] if app_obj.screenshots and len(app_obj.screenshots) > 4: if app_obj.video: app_obj.screenshots = app_obj.screenshots[0:4] else: if len(app_obj.screenshots) > 5: app_obj.screenshots = app_obj.screenshots[0:5] app_obj.video = app_details['video'] app_obj.score = app_details['score'] app_obj.installs = app_details['installs'] app_obj.description = app_details['description'] app_obj.put()
def getAppsData(sheet, detailed): app_details = [] for i in range(1, sheet.nrows): # app_id = sheet.cell_value(i, 1) app_name = sheet.cell_value(i, 0) print("\nSearching Apps for " + app_name + "...") if detailed: searchResults = play_scraper.search(app_name, page=1, detailed=detailed) print("\nSearch Results for " + app_name + ": \n") displayTable(searchResults, noOfRowsToDisplay=10, detailed=detailed) selectedApp = int((input( "\nPlease Select your App (0 -> if your app is not listed): ") )) if selectedApp == 0: print(app_name + " - SKIPPED!") continue else: print("\nSaved Data for " + searchResults[selectedApp - 1]['title'] + ".") app_details.append(searchResults[selectedApp - 1]) else: searchResults = play_scraper.search(app_name, page=1) print("\nSearch Results for " + app_name + ": \n") displayTable(searchResults, noOfRowsToDisplay=10) selectedApp = int((input( "\nPlease Select your App (0 -> if your app is not listed): ") )) if selectedApp == 0: print(app_name + " - SKIPPED!") continue else: print("\nSaved Data for " + searchResults[selectedApp - 1]['title'] + ".") appId = searchResults[selectedApp - 1]['app_id'] app_details.append(play_scraper.details(appId)) return app_details
def scrapeDeveloperScreenShots(developerName): apps = scraper.developer(developerName, results = 120) fileNameCount = 0 for appDict in apps: currentAppID = appDict['app_id'] currentAppDetailsDict = scraper.details(currentAppID) screenshotList = currentAppDetailsDict['screenshots'] for screenshoturl in screenshotList: urllib.request.urlretrieve(screenshoturl, 'material' + str(fileNameCount)) fileNameCount += 1