示例#1
0
def list_of_details_of_collection(category):
    f = open('data/' + category + '.txt', 'w')
    print('Finding results for ' + category)
    for collection in COLLECTIONS:
        for page in range(0, 42):
            try:
                scraper = play_scraper.collection(collection=collection,
                                                  category=category,
                                                  results=120,
                                                  page=page)
            except:
                break
            list_of_ids = []
            list_of_details = []
            for item in scraper:
                list_of_ids.append(item['app_id'])

            for id in list_of_ids:
                a = play_scraper.details(id)
                b = {
                    a['app_id']: [
                        a['title'], a['developer_id'], a['installs'],
                        a['developer_url'], a['developer_email']
                    ]
                }
                list_of_details.append(b)
                try:
                    f.write(str(b) + '\n')
                except:
                    break
    print(category + ' Done\n')
示例#2
0
def index():
    trending = play_scraper.collection(gl = 'in', collection='NEW_FREE', category='GAME', results=120)
    trending_data=[]
    for item in trending:
       print(item)
       trending_data.append([item['app_id'],item['icon'],item['url'],item['title'],item['developer'],item['score'],item['price']])
    return render_template('index.html',data=trending_data,len=len(trending_data))   
示例#3
0
def getIdList(collectionType, numResults=None, category=None):
    if (numResults is None):
        numResults = 1
    return returnAppIds(
        ps.collection(collection=collectionType,
                      results=numResults,
                      category=category))


# usage print(getIdList('TRENDING',100))
示例#4
0
 def download_app_details(self, collection, category, pg_num):
     n_tries = 0
     while n_tries < MAX_TRIES:
         n_tries += 1
         print("Will crawl", collection, category, pg_num, n_tries)
         try:
             res = play_scraper.collection(
                 collection=collection, category=category,
                 results=APPS_PER_PAGE, page=pg_num, detailed=True)
             print(res)
             return res
         except Exception as exc:
             print("Error", n_tries, collection, category, pg_num, exc)
             pause = min(((2**n_tries) * 60 * 5), MAX_BACKOFF)  # 5, 10...
             sleep(pause)
     return None
示例#5
0
def scrapeCollectionScreenShots(collectionName):
    apps = scraper.collection(collection = collectionName)
    fileNameCount = 0
    for appDict in apps:
        currentAppID = appDict['app_id']

        currentAppDetailsDict = scraper.details(currentAppID)


        icoin = currentAppDetailsDict['icon']
        appName = currentAppDetailsDict['title']

        # if 'GAME' in currentAppDetailsDict['category']:
        #     print('GameFound')
        #     continue

        urllib.request.urlretrieve(icoin, 
                                   appName+'.png')

        fileNameCount += 1
def scrape_and_save(collection, category=None, results=120, pages=5):
    date = datetime.datetime.now().strftime("%Y-%m-%d")
    csv_filename = "data/{}-{}-{}".format(date, collection, category)

    print("creating: {}".format(csv_filename))

    scraped_array = []

    for page in range(pages):
        scraped_array.extend(
            play_scraper.collection(collection=collection,
                                    category=category,
                                    results=results,
                                    page=page))

    app_ids = []
    for item in scraped_array:
        app_ids.append(item["app_id"])

    app_ids = list(dict.fromkeys(app_ids))

    app_details = []
    for identification in app_ids:
        scraped_details = play_scraper.details(identification)
        if "developer_address" in scraped_details:
            del scraped_details["developer_address"]
        app_details.append(scraped_details)

    app_details = convert(app_details)

    csv_columns = app_details[0].keys()

    try:
        with open(csv_filename, 'wb') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
            writer.writeheader()
            for data in app_details:
                writer.writerow(data)
    except IOError:
        print("I/O error")
import json
import unicodecsv as csv

import play_scraper

import pandas as pd

collection_name='TRENDING'
category_name'ENTERTAINMENT'
path_json ='./FolderNameToSaveResponseFiles/ENTERTAINMENT.json'
path_csv ='./FolderNameToSaveResponseFiles/ENTERTAINMENT.csv'

get_data = (play_scraper.collection(collection = collection_name, category = , results = 120, page = 1, detailed = True))
with open(".\\FolderNameToSaveResponseFiles\\ENTERTAINMENT.json", "w") as f1:
  json.dump(get_data, f1)
f1.close()

df = pd.read_json(path_json)
df.to_csv(path_csv, encoding = 'utf-8')
#!/usr/bin/env python
# coding: utf-8

# In[60]:

import play_scraper

# In[61]:

trending = play_scraper.collection(gl='in',
                                   collection='TRENDING',
                                   results=120,
                                   page=2)

# In[62]:

import pandas as pd
trending_data = []
for item in trending:
    trending_data.append([
        item['app_id'], item['url'], item['title'], item['developer'],
        item['score'], item['price']
    ])

df = pd.DataFrame(trending_data,
                  columns=[
                      "URL", "Play_Store_URL", "Game_Name", "Company",
                      "Rating", "Price"
                  ])
df.to_csv('trending_data.csv', index=False)
示例#9
0
def playstore():
    trending_data = []
    trending = play_scraper.collection(gl = 'in', collection=request.form['col'], category=request.form['cat'], results=120)
    for item in trending:
        trending_data.append([item['app_id'],item['icon'],item['url'],item['title'],item['developer'],item['score'],item['price']])
    return render_template('result.html',data=trending_data,len=len(trending_data))   
示例#10
0
                            + app_id[12][0] +
                            "', 1, CONVERT_TIMEZONE('JST', SYSDATE) WHERE NOT EXISTS ( SELECT app_id FROM superset_schema.app_ids WHERE app_id = '"
                            + app_id[12][0] + "'); ")
                        if DEBUG:
                            print(str(i) + ": " + app_id[12][0])
                        i = i + 1

# 簡単にスクレイピング
# Todo コネクション多すぎるのでforをコネクションの中に入れる
count = 1
for COLLECTION_NAME in ['NEW_FREE', 'TOP_FREE']:
    for i in range(5):
        if DEBUG:
            print("{}".format(COLLECTION_NAME))
        collections = play_scraper.collection(collection=COLLECTION_NAME,
                                              results=120,
                                              page=i)
        for collection in collections:
            detail = play_scraper.details(collection['app_id'])
            if -1 != detail['category'][0].find("GAME"):

                with get_connection() as conn:
                    with conn.cursor() as cur:
                        cur.execute(
                            "INSERT INTO superset_schema.app_ids(app_id, platform, created_at) SELECT %s, 1, CONVERT_TIMEZONE('JST', SYSDATE) WHERE NOT EXISTS ( SELECT app_id FROM superset_schema.app_ids WHERE app_id = %s); ",
                            (detail['app_id'], detail['app_id']))
                        if DEBUG:
                            print(str(count) + ": " + detail['app_id'])
                        count = count + 1

with open(LOG, mode='a') as f:
示例#11
0
# Fetches the categories from 'get_categories()'
categories = get_categories()
downloads_per_country_per_category = {}

for category in tqdm(categories):
    downloads_per_country_by_category = {}

    # Print the category that is being scraped
    tqdm.write('Category: %s ' % category)

    for country in countries:
        # For each category and each country, scrape the play store and write the information to output/app_info/ foler (according to the appropriate file name)
        list_top_n_apps_by_categry = \
            play_scraper.collection(collection='TOP_FREE', gl=country,
                                    category=category,
                                    results=number_of_results,
                                    detailed=True)

        # Write to output/app_info/{country}_{category}_{number_of_results}.json
        with open('output/app_info/%s_%s_%s.json' % (country, category,
                  number_of_results), 'w', encoding='utf-8') as \
            file_pointer:
            json.dump(list_top_n_apps_by_categry, file_pointer,
                      indent=4)

        # Find the number of installs (in thousands) for each app in each country and category.
        installs = [int(re.sub('[,+]', '', app['installs']))/1000 for app in
                    list_top_n_apps_by_categry]
                    
        # Find the sum of number of downloads for each country for each category.
        downloads_per_country_by_category[country] = sum(installs)
    "MUSIC_AND_AUDIO",
    "NEWS_AND_MAGAZINES",
    "PERSONALIZATION",
    "PHOTOGRAPHY",
    "PRODUCTIVITY",
    "SHOPPING",
    "SPORTS",
    "WEATHER"]

apps=[]
for cat in CATEGORIES:
    for topic in collections:
        for p in range(0,4):
            g = play_scraper.collection(collection=topic,
                               category = cat,
                               results = 100,
                               page = p, 
                               detailed = True, 
                               hl = 'en')
            apps.extend(g) 
            print(p)
            time.sleep(30)

app_price=[]
app_score=[]
app_name=[]
app_description=[]
app_category = []
for i in range(len(apps)):
    app_name.append(apps[i]['title'])
    app_description.append(apps[i]['description'])
    app_score.append(apps[i]['score'])
import play_scraper
from csv import writer

with open('spotify.csv', 'w', encoding="utf-8") as csv_file:
    csv_writer = writer(csv_file)
    headers = ["Details", "Similar", "Search", "Trending"]
    csv_writer.writerow(headers)

    details = play_scraper.details('com.spotify.music', hl='en', gl='in')
    similar = play_scraper.developer('Spotify Ltd.',
                                     results=5,
                                     hl='en',
                                     gl='in')
    search = play_scraper.search('com.spotify.music',
                                 detailed=True,
                                 hl='en',
                                 gl='in')
    collection = play_scraper.collection(collection='TRENDING',
                                         category='MUSIC_AND_AUDIO',
                                         results=10,
                                         hl='en',
                                         gl='in',
                                         page=1)
    csv_writer.writerow([details, similar, search, collection])

#print(play_scraper.details('com.spotify.music'))

#print(play_scraper.collection(collection='TRENDING',category='MUSIC_AND_AUDIO',results=10,hl='en',gl = 'in',page=1))
#print(play_scraper.search('com.spotify.music',detailed=True, hl='en', gl='in'))

#print(play_scraper.developer('Spotify Ltd.', results=5))
示例#14
0
    ws = wb.add_sheet(m)
    ws.write(0, 0, "App_Id")
    ws.write(0, 1, "URL")
    ws.write(0, 2, "Icon")
    ws.write(0, 3, "Title")
    ws.write(0, 4, "Developer")
    ws.write(0, 5, "Developer_id")
    ws.write(0, 6, "Description")
    ws.write(0, 7, "Score")
    ws.write(0, 8, "Price")
    ws.write(0, 9, "Free")

    for u in collect:
        try:
            app = play_scraper.collection(collection=u,
                                          category=m,
                                          results=50,
                                          page=0)
            for i in app:

                for j in i.values():
                    ws.write(k, l, j)
                    l = l + 1
                k = k + 1
                l = 0

        except:
            continue
        wb.save("PlayStore.xls")

print("finally created")