def get_app_store(app_id, n_reviews=200, filter="all"): """get play store reviews for app""" data = AppStore(app_name=app_id, country='us') data.review(how_many=n_reviews) reviews = data.reviews reviews_df = pd.DataFrame(reviews) if filter == "all": return reviews_df else: return reviews_df.query('rating' + filter)
def scrape_data(): app_info = AppStore( app_name=input("Enter name of the app in the App Store: "), country="in") app_info.review(how_many=3000) reviews = list( map(lambda review: normalize_review(review), app_info.reviews)) f = open("../app-store-reviews.json", "w") f.write(json.dumps(reviews)) f.close()
def Crawl_appsList(l): dataTable = [] for i, app in enumerate(l): try: application = AppStore(country="eg", app_name=app) application.review() tupl = Crawl_app(application, application.reviews) dataTable.append(tupl) except: print("Couldn't found App: ", app, "\nCheck the application name again.") continue return dataTable
import csv from app_store_scraper import AppStore # Change this to the name of the csv file containing your app metadata # requires "url_name" and "id" fields IN_FILE = 'as_eating_disorder_apps.csv' # read app name and id from csv file apps = [] with open(IN_FILE,newline='',encoding='utf-8') as file: reader = csv.DictReader(file) for row in reader: apps.append({'app_name':row['url_name'],'app_id':row['id']}) # for each app, use the AppStore scraper to fetch all reviews for app in apps: app_temp = AppStore(country='gb', app_name=app['app_name'], app_id=app['app_id']) app_temp.review() app['reviews'] = app_temp.reviews # save list of reviews to existing dictionary # save reviews to separate csv files for each app for app in apps: if app.get('reviews'): # Not all apps have reviews outfile = app['app_name'] + '_reviews.csv' # use the app_name to name the output file with open(outfile,'w',encoding='utf-8',newline='') as file: # Needed to add developerResponse by hand as only some reviews have this field dict_writer = csv.DictWriter(file, list(app['reviews'][0].keys()) + ['developerResponse']) dict_writer.writeheader() dict_writer.writerows(app['reviews'])
class AppReview(TopicModeller): def __init__(self, country, app_name): self.country = country self.app_name = app_name self.review_unit = AppStore(country=country, app_name=app_name) self.topic_reviews = pd.DataFrame() def get_reviews(self, num_reviews=None): ''' Returns the reviews as a pandas dataframe. ''' review_dict = { 'title': [], 'rating': [], 'userName': [], 'review': [], 'date': [], 'isEdited': [] } if num_reviews: self.review_unit.review(how_many=num_reviews) else: self.review_unit.review() for review in self.review_unit.reviews: review_dict['title'].append(review['title']) review_dict['rating'].append(review['rating']) review_dict['userName'].append(review['userName']) review_dict['review'].append(review['review']) review_dict['date'].append( review['date'].strftime("%m/%d/%Y, %H:%M:%S")) review_dict['isEdited'].append(review['isEdited']) self.review_df = pd.DataFrame(data=review_dict) return self.review_df def generate_embeddings(self): ''' Generates embeddings based on reviews from app store ''' df_dict = {'sentence': []} for idx, c in enumerate(self.review_df.review): df_dict['sentence'].append(c) df_pd = pd.DataFrame(data=df_dict) super().__init__(df_pd) def cluster_embeddings(self, num_topics=None): self.num_topics = num_topics self.topic_reviews = super().project(topics=num_topics) def plot_embeddings(self, port_num=9000): super().plot(self.app_name, port_num=port_num) def generate_topic_csv(self, csv_path): generate_csv(self.topic_reviews, csv_path, self.num_topics)
def get_reviews(app_name): app_search = AppStore(country="in", app_name=app_name) app_search.review(how_many=50000) return app_search.reviews
import json from app_store_scraper import AppStore arrivecan = AppStore(country="ca", app_name="arrivecan") arrivecan.review() for i in arrivecan.reviews: i['date'] = i['date'].strftime("%m/%d/%Y, %H:%M:%S") with open('apple.json', 'w', encoding='utf-8') as f: json.dump(arrivecan.reviews, f, ensure_ascii=False, indent=4)
# config['app_id']['uber_apple'], config['app_id']['blablacar_apple'], # config['app_id']['cabify_apple'], # config['app_id']['via_apple'], # config['app_id']['getaround_apple'], # config['app_id']['olacabs_apple'], # config['app_id']['taxieu_apple'], # config['app_id']['freenow_apple'], # config['app_id']['yandexgo_apple'] ] list_of_country_code = config['country_code'] output_path = config['output_path'] for app_id in list_of_app_id: df_merged = None for country_code in list_of_country_code: appstore = AppStore(country=country_code, app_name=app_id) appstore.review(how_many=100000) if df_merged is not None: df = pd.json_normalize(appstore.reviews) df_merged = df_merged.append(df) else: df_merged = pd.json_normalize(appstore.reviews) csv_file_name = app_id + '_apple_appstore_review.csv' df_merged.to_csv(output_path + csv_file_name)