def search(self, query, page=None, detailed=False): page = 0 if page is None else int(page) if page > len(self._pagtok) - 1: raise ValueError( 'Parameter \'page\' ({page}) must be between 0 and 12.'.format( page=page)) pagtok = self._pagtok[page] data = generate_post_data(0, 0, pagtok) self.params.update({ 'q': quote_plus(query), 'c': 'apps', }) response = send_request('POST', self._search_url, data, self.params) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') if detailed: apps = self._parse_multiple_apps(response) else: apps = [ parse_card_info(app) for app in soup.select('div[data-uitype="500"]') ] return apps
def developer(self, developer, results=None, page=None, detailed=False): """Sends a POST request and retrieves a list of the developer's published applications on the Play Store. :param developer: developer name to retrieve apps from, e.g. 'Disney' :param results: the number of app results to retrieve :param page: the page number to retrieve :param detailed: if True, sends request per app for its full detail :return: a list of app dictionaries """ if not isinstance(developer, basestring) or developer.isdigit(): raise ValueError('Parameter \'developer\' must be the developer name, not the developer id.') results = s.DEV_RESULTS if results is None else results page = 0 if page is None else page page_num = (results // 20) * page if not 0 <= page_num <= 12: raise ValueError('Page out of range. (results // 20) * page must be between 0 - 12') pagtok = self._pagtok[page_num] url = build_url('developer', developer) data = generate_post_data(results, 0, pagtok) response = send_request('POST', url, data, self.params) if detailed: apps = self._parse_multiple_apps(response) else: soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') apps = [parse_card_info(app) for app in soup.select('div[data-uitype="500"]')] return apps
def search(self, query, page=None, detailed=False): """Sends a POST request and retrieves a list of applications matching the query term(s). :param query: search query term(s) to retrieve matching apps :param page: the page number to retrieve. Max is 12. :param detailed: if True, sends request per app for its full detail :return: a list of apps matching search terms """ page = 0 if page is None else int(page) if page > len(self._pagtok) - 1: raise ValueError( "Parameter 'page' ({page}) must be between 0 and 12.".format( page=page)) pagtok = self._pagtok[page] data = generate_post_data(0, 0, pagtok) self.params.update({"q": quote_plus(query), "c": "apps"}) response = send_request("POST", self._search_url, data, self.params) soup = BeautifulSoup(response.content, "lxml", from_encoding="utf8") if detailed: apps = self._parse_multiple_apps(response) else: apps = [ parse_cluster_card_info(app) for app in soup.select("div.Vpfmgd") ] return apps
def categories(self, ignore_promotions=True): """Sends a GET request to the front page (app store base url), parses and returns a list of all available categories. """ categories = {} response = send_request('GET', s.BASE_URL, params=self.params) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') category_links = soup.select( 'div[id*="action-dropdown-children"] a[href*="category"]') age_query = '?age=' for cat in category_links: url = urljoin(s.BASE_URL, cat.attrs['href']) category_id = url.split('/')[-1] name = cat.string.strip() if age_query in category_id: category_id = 'FAMILY' url = url.split('?')[0] name = 'Family' if category_id not in categories: if ignore_promotions and '/store/apps/category/' not in url: continue categories[category_id] = { 'name': name, 'url': url, 'category_id': category_id } return categories
def categories(self): """Sends a GET request to the front page (app store base url), parses and returns a list of all available categories. Note: May contain some promotions, e.g. "Popular Characters" """ categories = {} strainer = SoupStrainer('a', {'class': 'child-submenu-link'}) response = send_request('GET', s.BASE_URL) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8', parse_only=strainer) category_links = soup.select('a.child-submenu-link') age = '?age=' for cat in category_links: url = urljoin(s.BASE_URL, cat.attrs['href']) category_id = url.split('/')[-1] name = cat.string.strip() if age in category_id: category_id = 'FAMILY' url = url.split('?')[0] name = 'Family' if category_id not in categories: categories[category_id] = { 'name': name, 'url': url, 'category_id': category_id} return categories
def categories(self, ignore_promotions=True): """Sends a GET request to the front page (app store base url), parses and returns a list of all available categories. """ categories = {} response = send_request("GET", s.BASE_URL, params=self.params) soup = BeautifulSoup(response.content, "lxml", from_encoding="utf8") category_links = soup.select( 'div[id*="action-dropdown-children"] a[href*="category"]') age_query = "?age=" for cat in category_links: url = urljoin(s.BASE_URL, cat.attrs["href"]) category_id = url.split("/")[-1] name = cat.string.strip() if age_query in category_id: category_id = "FAMILY" url = url.split("?")[0] name = "Family" if category_id not in categories: if ignore_promotions and "/store/apps/category/" not in url: continue categories[category_id] = { "name": name, "url": url, "category_id": category_id, } return categories
def developer(self, developer, results=None, page=None, detailed=False): if not isinstance(developer, basestring) or developer.isdigit(): raise ValueError( 'Parameter \'developer\' must be the developer name, not the developer id.' ) results = s.DEV_RESULTS if results is None else results page = 0 if page is None else page page_num = (results // 20) * page if not 0 <= page_num <= 12: raise ValueError( 'Page out of range. (results // 20) * page must be between 0 - 12' ) pagtok = self._pagtok[page_num] url = build_url('developer', developer) data = generate_post_data(results, 0, pagtok) response = send_request('POST', url, data, self.params) if detailed: apps = self._parse_multiple_apps(response) else: soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') apps = [ parse_card_info(app) for app in soup.select('div[data-uitype="500"]') ] return apps
def search(self, query, page=None, detailed=False): """Sends a POST request and retrieves a list of applications matching the query term(s). :param query: search query term(s) to retrieve matching apps :param page: the page number to retrieve. Max is 12. :param detailed: if True, sends request per app for its full detail :return: a list of apps matching search terms """ page = 0 if page is None else int(page) if page > len(self._pagtok) - 1: raise ValueError('Parameter \'page\' ({page}) must be between 0 and 12.'.format( page=page)) pagtok = self._pagtok[page] data = generate_post_data(0, 0, pagtok) self.params.update({ 'q': quote_plus(query), 'c': 'apps', }) response = send_request('POST', self._search_url, data, self.params) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') if detailed: apps = self._parse_multiple_apps(response) else: apps = [parse_card_info(app) for app in soup.select('div[data-uitype="500"]')] return apps
def developer(self, developer, results=None, page=None, detailed=False): """Sends a POST request and retrieves a list of the developer's published applications on the Play Store. :param developer: developer name to retrieve apps from, e.g. 'Disney' :param results: the number of app results to retrieve :param page: the page number to retrieve :param detailed: if True, sends request per app for its full detail :return: a list of app dictionaries """ if not isinstance(developer, basestring) or developer.isdigit(): raise ValueError('Parameter \'developer\' must be the developer name, not the developer id.') results = s.DEV_RESULTS if results is None else results page = 0 if page is None else page page_num = (results // 20) * page if not 0 <= page_num <= 12: raise ValueError('Page out of range. (results // 20) * page must be between 0 - 12') pagtok = self._pagtok[page_num] url = build_url('developer', developer) data = generate_post_data(results, 0, pagtok) response = send_request('POST', url, data, self.params) if detailed: apps = self._parse_multiple_apps(response) else: soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') apps = [parse_card_info(app) for app in soup.select('div[data-uitype=500]')] return apps
def category_items(self, category, detailed=False): """ list of apps in category main page :param detailed: :param category: :return: """ category_url = build_category_url(category) response = send_request("GET", category_url) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') items_elements = soup.select(self.list_item_selector) if not len(items_elements): items_elements = soup.select(self.list_item_promo_selector) app_ids = [ get_query_params(element['href'])['id'][0] for element in items_elements ] if not detailed: return [{'app_id': app_id} for app_id in app_ids] return multi_futures_app_request(app_ids=app_ids)
def categories(self, ignore_promotions=True): categories = {} strainer = SoupStrainer('ul', {'class': 'submenu-item-wrapper'}) response = send_request('GET', s.BASE_URL, params=self.params) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8', parse_only=strainer) category_links = soup.select('a.child-submenu-link') category_links += soup.select('a.parent-submenu-link') age_query = '?age=' for cat in category_links: url = urljoin(s.BASE_URL, cat.attrs['href']) category_id = url.split('/')[-1] name = cat.string.strip() if age_query in category_id: category_id = 'FAMILY' url = url.split('?')[0] name = 'Family' if category_id not in categories: if ignore_promotions and '/store/apps/category/' not in url: continue categories[category_id] = { 'name': name, 'url': url, 'category_id': category_id } return categories
def cluster_items(self, gsr, detailed=False): """ Get cluster page items https://play.google.com/store/apps/collection/cluster?clp=0g4cChoKFHRvcHNlbGxpbmdfZnJlZV9HQU1FEAcYAw%3D%3D:S:ANO1ljJ_Y5U&gsr=Ch_SDhwKGgoUdG9wc2VsbGluZ19mcmVlX0dBTUUQBxgD:S:ANO1ljL4b8c :param gsr: cluster id :return: list of app details """ cluster_url = build_cluster_url(gsr=gsr) response = send_request("GET", cluster_url) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') items_elements = soup.select(self.list_item_selector) if not len(items_elements): items_elements = soup.select(self.list_item_promo_selector) app_ids = [ get_query_params(element['href'])['id'][0] for element in items_elements ] if not detailed: return [{'app_id': app_id} for app_id in app_ids] return multi_futures_app_request(app_ids=app_ids)
def search(self, query, page=None, detailed=False): """Sends a POST request and retrieves a list of applications matching the query term(s). :param query: search query term(s) to retrieve matching apps :param page: the page number to retrieve. Max is 12. :param detailed: if True, sends request per app for its full detail :return: a list of apps matching search terms """ page = 0 if page is None else int(page) if page > len(self._pagtok) - 1: raise ValueError('Parameter \'page\' ({page}) must be between 0 and 12.'.format( page=page)) pagtok = self._pagtok[page] data = generate_post_data(0, 0, pagtok) self.params.update({ 'q': quote_plus(query), 'c': 'apps', }) response = send_request('POST', self._search_url, data, self.params) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') if detailed: apps = self._parse_multiple_apps(response) else: apps = [parse_card_info(app) for app in soup.select('div[data-uitype=500]')] return apps
def collection( self, collection_id, category_id=None, results=None, page=None, age=None, detailed=False, ): """Sends a POST request and fetches a list of applications belonging to the collection and an optional category. :param collection_id: the collection id, e.g. 'NEW_FREE'. :param category_id: (optional) the category id, e.g. 'GAME_ACTION'. :param results: the number of apps to retrieve at a time. :param page: page number to retrieve; limitation: page * results <= 500. :param age: an age range to filter by (only for FAMILY categories) :param detailed: if True, sends request per app for its full detail :return: a list of app dictionaries """ if collection_id not in COLLECTIONS and not collection_id.startswith( "promotion"): raise ValueError("Invalid collection_id '{collection}'.".format( collection=collection_id)) collection_name = COLLECTIONS.get(collection_id) or collection_id category = "" if category_id is None else CATEGORIES.get(category_id) if category is None: raise ValueError("Invalid category_id '{category}'.".format( category=category_id)) results = s.NUM_RESULTS if results is None else results if results > 120: raise ValueError("Number of results cannot be more than 120.") page = 0 if page is None else page if page * results > 500: raise ValueError( "Start (page * results) cannot be greater than 500.") if category.startswith("FAMILY") and age is not None: self.params["age"] = AGE_RANGE[age] url = build_collection_url(category, collection_name) data = generate_post_data(results, page) response = send_request("POST", url, data, self.params) if detailed: apps = self._parse_multiple_apps(response) else: soup = BeautifulSoup(response.content, "lxml", from_encoding="utf8") apps = [ parse_card_info(app_card) for app_card in soup.select('div[data-uitype="500"]') ] return apps
def test_request_with_params(self): method = 'GET' params = {'q': 'google play store'} response = send_request(method, self.url, params=params) expected_url = "{base}{params}".format( base=self.url, params='?q=google+play+store') self.assertEqual(response.status_code, 200) self.assertEqual(response.url, expected_url)
def test_request_with_params(self): method = "GET" params = {"q": "google play store"} response = send_request(method, self.url, params=params) expected_url = "{base}{params}".format(base=self.url, params="?q=google+play+store") self.assertEqual(response.status_code, 200) self.assertEqual(response.url, expected_url)
def reviews(self, app_id, page=1): """Sends a POST request and retrieves a list of reviews for the specified app. :param app_id: the app to retrieve details from, e.g. 'com.nintendo.zaaa' :param page: the page number to retrieve; max is 10 :return: a list of reviews """ data = { 'reviewType': 0, 'pageNum': page, 'id': app_id, 'reviewSortOrder': 4, 'xhr': 1, 'hl': self.language } self.params['authuser'] = '******' response = send_request('POST', s.REVIEW_URL, data, self.params) content = response.text content = content[content.find('[["ecr"'):].strip() data = json.loads(content) html = data[0][2] soup = BeautifulSoup(html, 'lxml', from_encoding='utf8') reviews = [] for element in soup.select('.single-review'): review = {} avatar_style = element.select_one('.author-image').get('style') if avatar_style: sheet = cssutils.css.CSSStyleSheet() sheet.add('tmp { %s }' % avatar_style) review['author_image'] = list(cssutils.getUrls(sheet))[0] review_header = element.select_one('.review-header') review['review_id'] = review_header.get('data-reviewid', '') review['review_permalink'] = review_header.select_one('.reviews-permalink').get('href') review['author_name'] = review_header.select_one('.author-name').text review['review_date'] = review_header.select_one('.review-date').text curr_rating = review_header.select_one('.current-rating').get('style') review['current_rating'] = int(int(str(cssutils.parseStyle(curr_rating).width).replace('%', '')) / 20) body_elem = element.select_one('.review-body') review_title = body_elem.select_one('.review-title').extract() body_elem.select_one('.review-link').decompose() review['review_title'] = review_title.text review['review_body'] = body_elem.text reviews.append(review) return reviews
def suggestions(self, query): if not query: raise ValueError("Cannot get suggestions for an empty query.") self.params.update({ 'json': 1, 'c': 0, 'query': query, }) response = send_request('GET', self._suggestion_url, params=self.params) suggestions = [q['s'] for q in response.json()] return suggestions
def suggestions(self, query): """Sends a GET request and retrieves a list of autocomplete suggestions matching the query term(s). :param query: search query term(s) to retrieve autocomplete suggestions :return: a list of suggested search queries, up to 5 """ if not query: raise ValueError("Cannot get suggestions for an empty query.") params = {'json': 1, 'c': 0, 'hl': 'en', 'gl': 'us', 'query': query} response = send_request('GET', self._suggestion_url, params=params) suggestions = [q['s'] for q in response.json()] return suggestions
def collection(self, collection_id, category_id=None, results=None, page=None, age=None, detailed=False): """Sends a POST request and fetches a list of applications belonging to the collection and an optional category. :param collection_id: the collection id, e.g. 'NEW_FREE'. :param category_id: (optional) the category id, e.g. 'GAME_ACTION'. :param results: the number of apps to retrieve at a time. :param page: page number to retrieve; limitation: page * results <= 500. :param age: an age range to filter by (only for FAMILY categories) :param detailed: if True, sends request per app for its full detail :return: a list of app dictionaries """ if (collection_id not in COLLECTIONS and not collection_id.startswith('promotion')): raise ValueError('Invalid collection_id \'{collection}\'.'.format( collection=collection_id)) collection_name = COLLECTIONS.get(collection_id) or collection_id category = '' if category_id is None else CATEGORIES.get(category_id) if category is None: raise ValueError('Invalid category_id \'{category}\'.'.format( category=category_id)) results = s.NUM_RESULTS if results is None else results if results > 120: raise ValueError('Number of results cannot be more than 120.') page = 0 if page is None else page if page * results > 500: raise ValueError('Start (page * results) cannot be greater than 500.') if category.startswith('FAMILY') and age is not None: self.params['age'] = AGE_RANGE[age] url = build_collection_url(category, collection_name) data = generate_post_data(results, page) response = send_request('POST', url, data, self.params) if detailed: apps = self._parse_multiple_apps(response) else: soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') apps = [parse_card_info(app_card) for app_card in soup.select('div[data-uitype="500"]')] return apps
def collection(self, collection_id, category_id=None, results=None, page=None, age=None, detailed=False): if (collection_id not in COLLECTIONS and not collection_id.startswith('promotion')): raise ValueError('Invalid collection_id \'{collection}\'.'.format( collection=collection_id)) collection_name = COLLECTIONS.get(collection_id) or collection_id category = '' if category_id is None else CATEGORIES.get(category_id) if category is None: raise ValueError('Invalid category_id \'{category}\'.'.format( category=category_id)) results = s.NUM_RESULTS if results is None else results if results > 120: raise ValueError('Number of results cannot be more than 120.') page = 0 if page is None else page if page * results > 500: raise ValueError( 'Start (page * results) cannot be greater than 500.') if category.startswith('FAMILY') and age is not None: self.params['age'] = AGE_RANGE[age] url = build_collection_url(category, collection_name) data = generate_post_data(results, page) response = send_request('POST', url, data, self.params) if detailed: apps = self._parse_multiple_apps(response) else: soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') apps = [ parse_card_info(app_card) for app_card in soup.select('div[data-uitype="500"]') ] return apps
def suggestions(self, query): """Sends a GET request and retrieves a list of autocomplete suggestions matching the query term(s). :param query: search query term(s) to retrieve autocomplete suggestions :return: a list of suggested search queries, up to 5 """ if not query: raise ValueError("Cannot get suggestions for an empty query.") self.params.update({"json": 1, "c": 0, "query": query}) response = send_request("GET", self._suggestion_url, params=self.params) suggestions = [q["s"] for q in response.json()] return suggestions
def details(self, app_id): url = build_url('details', app_id) try: response = send_request('GET', url, params=self.params) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') except requests.exceptions.HTTPError as e: raise ValueError('Invalid application ID: {app}. {error}'.format( app=app_id, error=e)) app_json = parse_app_details(soup) app_json.update({ 'app_id': app_id, 'url': url, }) return app_json
def similar(self, app_id, detailed=False, **kwargs): url = build_url('similar', app_id) response = send_request('GET', url, params=self.params, allow_redirects=True) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') if detailed: apps = self._parse_multiple_apps(response) else: apps = [ parse_card_info(app) for app in soup.select('div[data-uitype="500"]') ] return apps
def similar(self, app_id, detailed=False): """Sends a GET request, follows the redirect, and retrieves a list of applications similar to the specified app. :param app_id: the app to retrieve details from, e.g. 'com.nintendo.zaaa' :param detailed: if True, sends request per app for its full detail :return: a list of similar apps """ url = build_url('similar', app_id) response = send_request('GET', url, allow_redirects=True) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') if detailed: apps = self._parse_multiple_apps(response) else: apps = [self._parse_card_info(app) for app in soup.select('div[data-uitype=500]')] return apps
def details(self, app_id): """Sends a GET request and parses an application's details. :param app_id: the app to retrieve details, e.g. 'com.nintendo.zaaa' :return: a dictionary of app details """ url = build_url("details", app_id) try: response = send_request("GET", url, params=self.params) soup = BeautifulSoup(response.content, "lxml", from_encoding="utf8") except requests.exceptions.HTTPError as e: raise ValueError("Invalid application ID: {app}. {error}".format( app=app_id, error=e)) app_json = parse_app_details(soup) app_json.update({"app_id": app_id, "url": url}) return app_json
def suggestions(self, query): """Sends a GET request and retrieves a list of autocomplete suggestions matching the query term(s). :param query: search query term(s) to retrieve autocomplete suggestions :return: a list of suggested search queries, up to 5 """ if not query: raise ValueError("Cannot get suggestions for an empty query.") self.params.update({ 'json': 1, 'c': 0, 'query': query, }) response = send_request('GET', self._suggestion_url, params=self.params) suggestions = [q['s'] for q in response.json()] return suggestions
def details(self, app_id): """Sends a GET request and parses an application's details. :param app_id: the app to retrieve details, e.g. 'com.nintendo.zaaa' :return: a dictionary of app details """ url = build_url('details', app_id) try: response = send_request('GET', url, params=self.params) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') except requests.exceptions.HTTPError as e: raise ValueError('Invalid application ID: {app}. {error}'.format( app=app_id, error=e)) app_json = parse_app_details(soup) app_json.update({ 'app_id': app_id, 'url': url, }) return app_json
def similar(self, app_id, detailed=False, **kwargs): """Sends a GET request, follows the redirect, and retrieves a list of applications similar to the specified app. :param app_id: app to retrieve details from, e.g. 'com.nintendo.zaaa' :param detailed: if True, sends request per app for its full detail :return: a list of similar apps """ url = build_url('similar', app_id) response = send_request('GET', url, params=self.params, allow_redirects=True) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') if detailed: apps = self._parse_multiple_apps(response) else: apps = [parse_card_info(app) for app in soup.select('div[data-uitype="500"]')] return apps
def category_clusters(self, category): """ :param category: :return: """ clusters = {} cluster_selector = "c-wiz > c-wiz > div > div.Z3lOXb > div.xwY9Zc > a" category_url = build_category_url(category=category) response = send_request("GET", category_url) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8') cluster_elements = soup.select(cluster_selector) for element in cluster_elements: title = element.h2.text gsr = get_query_params(element['href'])['gsr'][0] clusters[title] = gsr return clusters
def categories(self, ignore_promotions=True): """Sends a GET request to the front page (app store base url), parses and returns a list of all available categories. """ categories = {} strainer = SoupStrainer('ul', {'class': 'submenu-item-wrapper'}) response = send_request('GET', s.BASE_URL, params=self.params) soup = BeautifulSoup(response.content, 'lxml', from_encoding='utf8', parse_only=strainer) category_links = soup.select('a.child-submenu-link') category_links += soup.select('a.parent-submenu-link') age_query = '?age=' for cat in category_links: url = urljoin(s.BASE_URL, cat.attrs['href']) category_id = url.split('/')[-1] name = cat.string.strip() if age_query in category_id: category_id = 'FAMILY' url = url.split('?')[0] name = 'Family' if category_id not in categories: if ignore_promotions and '/store/apps/category/' not in url: continue categories[category_id] = { 'name': name, 'url': url, 'category_id': category_id} return categories
def similar(self, app_id, detailed=False, **kwargs): """Sends a GET request, follows the redirect, and retrieves a list of applications similar to the specified app. :param app_id: app to retrieve details from, e.g. 'com.nintendo.zaaa' :param detailed: if True, sends request per app for its full detail :return: a list of similar apps """ url = build_url("similar", app_id) response = send_request("GET", url, params=self.params, allow_redirects=True) soup = BeautifulSoup(response.content, "lxml", from_encoding="utf8") if detailed: apps = self._parse_multiple_apps(response) else: apps = [ parse_cluster_card_info(app) for app in soup.select("div.Vpfmgd") ] return apps
def reviews(self, app_id_list, page=0): #print("In Scraper - reviews def \n") reviews_adder = [] for n in range(len(app_id_list)): #app_id=app_id_list[n] #print(app_id) data = { 'reviewType': 0, 'pageNum': page, 'id': app_id_list[n], 'reviewSortOrder': 4, 'xhr': 1, 'hl': self.language } self.params['authuser'] = '******' #print('before send request') #print(app_id_list[n]) response = send_request('POST', s.REVIEW_URL, data, self.params) content = response.text content = content[content.find('[["ecr"'):].strip() data = json.loads(content) #print(data) html = data[0][2] soup = BeautifulSoup(html, 'lxml', from_encoding='utf8') #print(soup) reviews = [] for element in soup.select('.single-review'): #print('Inside single review') review = {} #print("In Scraper - reviews def- rev_app_id:: \n") #print(app_id) review['rev_app_id'] = app_id_list[n] avatar_style = element.select_one('.author-image').get('style') #print(avatar_style) if avatar_style: sheet = cssutils.css.CSSStyleSheet() sheet.add('tmp { %s }' % avatar_style) review['author_image'] = list(cssutils.getUrls(sheet))[0] review_header = element.select_one('.review-header') review['review_id'] = review_header.get('data-reviewid', '') review['review_permalink'] = review_header.select_one( '.reviews-permalink').get('href') review['author_name'] = review_header.select_one( '.author-name').text review['review_date'] = review_header.select_one( '.review-date').text curr_rating = review_header.select_one('.current-rating').get( 'style') review['current_rating'] = int( int( str(cssutils.parseStyle(curr_rating).width).replace( '%', '')) / 20) body_elem = element.select_one('.review-body') review_title = body_elem.select_one('.review-title').extract() body_elem.select_one('.review-link').decompose() review['review_title'] = review_title.text review['review_body'] = body_elem.text reviews.append(review) reviews_adder.append(review) data = '' return reviews_adder
def test_send_normal_request(self): method = 'GET' response = send_request(method, self.url) self.assertEqual(response.status_code, 200) self.assertEqual(response.url, self.url)
def test_send_normal_request(self): method = "GET" response = send_request(method, self.url) self.assertEqual(response.status_code, 200) self.assertEqual(response.url, self.url)