def usersList():
    """
    Parse through data from /thepub to get unique usernames, user ids,
    and locations. Stores this information in a csv file to be used in later api
    requests. Limited to 100 api calls per hour requiring sleep method.
    May be run multiple times to retrieve Continuously run until user stops script.
    """

    usersList = files.readUsers()
    apiCount = 0
    userNameCountAdditions = 0
    while (True):
        # get 25 most recent updates
        data = untappd.getPubFeed()
        apiCount += 1
        print 'apiCount: ' + str(apiCount)
        checkins = data['response']['checkins']['items']
        # each response has 25 items, each with a username
        for checkin in checkins:
            userId = checkin['user']['uid']
            username = checkin['user']['user_name']
            userLocation = checkin['user']['location']
            if hash(str(userId)) not in usersList:
                if userLocation != '':
                    userNameCountAdditions += 1
                    userAttribs = {'uid': str(userId), 'username': username,
                                   'location': {'name': unicode(userLocation).encode("utf-8")}, 'ratings': {}}
                    user = UT.UntappdUser(userAttribs)
                    usersList[hash(str(userId))] = user
        writeJSONFile('../data/users.json', usersList)
        userCount = len(usersList)
        print 'Total Users: ' + str(userCount)
        # Untappd only allows 100 api requests per hour. Sleep for 38
        # seconds between requests
        sleep(37)
示例#2
0
def createDataPoints():
    """Make the data points of user locations for the map generation."""
    usersList = files.readUsers()
    beersList = files.readBeers()
    points = []
    i = 1
    for hashId, user in usersList.iteritems():
        if 'lat' in user.location and user.ratings:
            for bid, rating in user.ratings.iteritems():
                country = None
                if 'country' in user.location:
                    country = user.location['country']
                pointAttribs = {
                    'lat': user.location['lat'],
                    'lng': user.location['lng'],
                    'country': country,
                    'abv': beersList[str(hash(bid))].abv,
                    'rating': rating,
                    'style': beersList[str(hash(bid))].style
                }
                point = dp.dataPoint(pointAttribs)
                points.append(point)
                if i % 1000 == 0:
                    print "Points added: " + str(i)
                i += 1
    data = dp.dataPoints(points)
    writeJSONFile('../data/dataPoints.json', data)
示例#3
0
def normalizeUsers():
    """
    Change the user ids so the information can be made public and
    use the googlemaps module to determine the user's location.
    """
    usersList = files.readUsers()
    newUsersList = {}

    i = 1
    newUid = 1
    for hashId, user in usersList.iteritems():
        uid = user.uid
        user.uid = str(newUid)
        location = user.location
        if location['name'] != "" and 'lat' not in location:
            if isinstance(location['name'], unicode):
                location = location['name'].encode('utf-8')
            else:
                location = location['name']

            mapInfo = PBAMap.getLatLong(location, i)
            i += 1
            if mapInfo == 'apiLimit':
                print str(
                    i
                ) + " At daily API limit. Update script and repeat tomorrow"
            elif mapInfo != '':
                user.location = {
                    'name': location,
                    'lat': mapInfo['lat'],
                    'lng': mapInfo['lng'],
                }
                if 'country' in mapInfo:
                    user.location['country'] = mapInfo['country']
                print str(i), user.location
            else:
                print str(i), "checked: none"
                user.location = {'name': ''}
        newUid += 1
        newUsersList[hash(str(uid))] = user

    writeJSONFile('../data/users.json', newUsersList)
    print "User ids, usernames, and locations updated\n"
示例#4
0
def usersList():
    """
    Parse through data from /thepub to get unique usernames, user ids,
    and locations. Stores this information in a csv file to be used in later api
    requests. Limited to 100 api calls per hour requiring sleep method.
    May be run multiple times to retrieve Continuously run until user stops script.
    """

    usersList = files.readUsers()
    apiCount = 0
    userNameCountAdditions = 0
    while (True):
        # get 25 most recent updates
        data = untappd.getPubFeed()
        apiCount += 1
        print 'apiCount: ' + str(apiCount)
        checkins = data['response']['checkins']['items']
        # each response has 25 items, each with a username
        for checkin in checkins:
            userId = checkin['user']['uid']
            username = checkin['user']['user_name']
            userLocation = checkin['user']['location']
            if hash(str(userId)) not in usersList:
                if userLocation != '':
                    userNameCountAdditions += 1
                    userAttribs = {
                        'uid': str(userId),
                        'username': username,
                        'location': {
                            'name': unicode(userLocation).encode("utf-8")
                        },
                        'ratings': {}
                    }
                    user = UT.UntappdUser(userAttribs)
                    usersList[hash(str(userId))] = user
        writeJSONFile('../data/users.json', usersList)
        userCount = len(usersList)
        print 'Total Users: ' + str(userCount)
        # Untappd only allows 100 api requests per hour. Sleep for 38
        # seconds between requests
        sleep(37)
def normalizeUsers():
    """
    Change the user ids so the information can be made public and
    use the googlemaps module to determine the user's location.
    """
    usersList = files.readUsers()
    newUsersList = {}

    i = 1
    newUid = 1
    for hashId, user in usersList.iteritems():
        uid = user.uid
        user.uid = str(newUid)
        location = user.location
        if location['name'] != "" and 'lat' not in location:
            if isinstance(location['name'], unicode):
                location = location['name'].encode('utf-8')
            else:
                location = location['name']

            mapInfo = PBAMap.getLatLong(location, i)
            i += 1
            if mapInfo == 'apiLimit':
                print str(i) + " At daily API limit. Update script and repeat tomorrow"
            elif mapInfo != '':
                user.location = {
                    'name': location,
                    'lat': mapInfo['lat'],
                    'lng': mapInfo['lng'],
                }
                if 'country' in mapInfo:
                    user.location['country'] = mapInfo['country']
                print str(i), user.location
            else:
                print str(i), "checked: none"
                user.location = {'name': ''}
        newUid += 1
        newUsersList[hash(str(uid))] = user

    writeJSONFile('../data/users.json', newUsersList)
    print "User ids, usernames, and locations updated\n"
def createDataPoints():
    """Make the data points of user locations for the map generation."""
    usersList = files.readUsers()
    beersList = files.readBeers()
    points = []
    i = 1
    for hashId, user in usersList.iteritems():
        if 'lat' in user.location and user.ratings:
            for bid, rating in user.ratings.iteritems():
                country = None
                if 'country' in user.location:
                    country = user.location['country']
                pointAttribs = {'lat': user.location['lat'], 'lng': user.location['lng'],
                'country': country, 'abv': beersList[str(hash(bid))].abv, 'rating': rating,
                'style': beersList[str(hash(bid))].style}
                point = dp.dataPoint(pointAttribs)
                points.append(point)
                if i % 1000 == 0:
                    print "Points added: " + str(i)
                i += 1
    data = dp.dataPoints(points)
    writeJSONFile('../data/dataPoints.json', data)
Single-purpose script for easy monitoring of data quantity.

Load each json data file, find its size and generate
a plot for presentation.
"""

import fileReader as files
import matplotlib.pyplot as plt
import os
import numpy as np

# Load files
print "Loading beers..."
beersList = files.readBeers()
print "Loading users..."
usersList = files.readUsers()
print "Loading breweries..."
breweriesList = files.readBreweries()

# Path for saving the images
path = "../data/labels/"
fileList = os.listdir(path)

# Data gathering
labels = ('Beers', 'Reviews', 'Users', 'Breweries', 'Labels')
index = np.arange(len(labels))
quantities = (len(beersList), sum([len(x.ratings) for x in usersList.values()]),
               len(usersList),  len(breweriesList), len(fileList))

# Plot the quantities
plt.figure(1)
示例#8
0
def userReviews():
    """
    Parse through user reviews /user/beers/{username}
    Retrieves at most 50 reviews per user, retains review, beer, and
    brewery information. After querying the api, remove username to
    lessen privacy concerns with untappd data.
    """
    usersList = files.readUsers()
    beersList = files.readBeers()
    breweryList = files.readBreweries()
    breweryToBeers = files.readBreweryToBeers()

    total = 0
    totalUsersComplete = 0
    for userHash, user in usersList.iteritems():
        totalUsersComplete += 1
        # if the data has been normalized, old data will not
        # have usernames. Ignore older users which may have
        # already gotten reviews
        if user.username:
            userId = user.uid
            username = user.username
            user.username = None
            userReviewCount = 0
            offsetTotal = 0
            ratings = {}

            print 'Processing ' + str(userId) + ': ' + username
            # each response returns at most 25 reviews. To get more user
            # reviews, call again with an offset get at most 50 reviews
            # from the same user
            while (userReviewCount < 2):
                print username + ': ' + str(userReviewCount + 1)
                data = untappd.getUserReviewData(username, offsetTotal)
                offset = data['response']['beers']['count']
                offsetTotal += offset
                reviews = data['response']['beers']['items']
                for review in reviews:
                    userRating = review['rating_score']
                    if userRating > 0:
                        beerInfo = review['beer']
                        breweryInfo = review['brewery']
                        # fill in beer information
                        if hash(str(beerInfo['bid'])) not in beersList:
                            stylesList = []
                            style = unicode(
                                beerInfo['beer_style']).encode("utf-8")
                            styles = style.lower().title().split('/')
                            for style in styles:
                                style = style.strip()
                                stylesList.append(style)
                            beerAttribs = {
                                'bid':
                                str(beerInfo['bid']),
                                'name':
                                unicode(beerInfo['beer_name']).encode("utf-8"),
                                'label':
                                beerInfo['beer_label'],
                                'abv':
                                beerInfo['beer_abv'],
                                'ibu':
                                beerInfo['beer_ibu'],
                                'style':
                                stylesList,
                                'description':
                                unicode(beerInfo['beer_description']).encode(
                                    "utf-8"),
                                'rating':
                                beerInfo['rating_score'],
                                'numRatings':
                                1,
                                'brewery':
                                str(breweryInfo['brewery_id'])
                            }
                            beer = UT.UntappdBeer(beerAttribs)
                            beersList[hash(beer.bid)] = beer
                        else:
                            beersList[hash(str(
                                beerInfo['bid']))].numRatings += 1
                        # fill in brewery information
                        if hash(str(
                                breweryInfo['brewery_id'])) not in breweryList:
                            breweryAttribs = {
                                'breweryId':
                                str(breweryInfo['brewery_id']),
                                'name':
                                unicode(breweryInfo['brewery_name']).encode(
                                    "utf-8"),
                                'label':
                                breweryInfo['brewery_label'],
                                'country':
                                unicode(breweryInfo['country_name']).encode(
                                    "utf-8"),
                                'location':
                                unicode(
                                    breweryInfo['location']).encode("utf-8")
                            }
                            brewery = UT.UntappdBrewery(breweryAttribs)
                            breweryList[hash(brewery.breweryId)] = brewery

                        # map breweery_id to a list of beers produced there
                        if hash(str(breweryInfo['brewery_id'])
                                ) not in breweryToBeers:
                            # store the current beer in a list of beers of
                            # the brewery
                            breweryToBeers[hash(str(
                                breweryInfo['brewery_id']))] = {
                                    str(breweryInfo['brewery_id']):
                                    [str(beerInfo['bid'])]
                                }
                        else:
                            # add current beer to brewery's list of beers
                            breweryToBeers[hash(str(
                                breweryInfo['brewery_id']))][str(
                                    breweryInfo['brewery_id'])].append(
                                        str(beerInfo['bid']))

                        # add list of beer ratings to user
                        ratings[str(beerInfo['bid'])] = userRating
                userReviewCount += 1
                user.ratings = ratings

                # store the dictionaries after new data so user doesn't kill process before writing
                # with open('../data/users.json', 'wb') as usersFile:
                #     json = jpickle.encode(usersList)
                #     usersFile.write(json)
                # with open('../data/beers.json', 'wb') as beersFile:
                #     json = jpickle.encode(beersList)
                #     beersFile.write(json)
                # with open('../data/breweries.json', 'wb') as breweriesFile:
                #     json = jpickle.encode(breweryList)
                #     breweriesFile.write(json)
                # with open('../data/breweryToBeers.json', 'wb') as breweryToBeersFile:
                #     json = jpickle.encode(breweryToBeers)
                #     breweryToBeersFile.write(json)

                # if the offset is less than 25, then there are no more reviews to retrieve
                if offset < 25:
                    break
            writeJSONFile('../data/users.json', usersList)
            writeJSONFile('../data/beers.json', beersList)
            writeJSONFile('../data/breweries.json', breweryList)
            writeJSONFile('../data/breweryToBeers.json', breweryToBeers)

            total += len(ratings)
            print str(userId) + ': ' + username + ', Processed: ' + str(
                len(ratings)) + ' reviews'
            print 'Total Reviews: ' + str(total)
            print 'Total Users Completed: ' + str(totalUsersComplete)
            sleep(37 * (userReviewCount))
        else:
            total += len(user.ratings)
def userReviews():
    """
    Parse through user reviews /user/beers/{username}
    Retrieves at most 50 reviews per user, retains review, beer, and
    brewery information. After querying the api, remove username to
    lessen privacy concerns with untappd data.
    """
    usersList = files.readUsers()
    beersList = files.readBeers()
    breweryList = files.readBreweries()
    breweryToBeers = files.readBreweryToBeers()

    total = 0
    totalUsersComplete = 0
    for userHash, user in usersList.iteritems():
        totalUsersComplete += 1
        # if the data has been normalized, old data will not
        # have usernames. Ignore older users which may have
        # already gotten reviews
        if user.username:
            userId = user.uid
            username = user.username
            user.username = None
            userReviewCount = 0
            offsetTotal = 0
            ratings = {}

            print 'Processing ' + str(userId) + ': ' + username
            # each response returns at most 25 reviews. To get more user
            # reviews, call again with an offset get at most 50 reviews
            # from the same user
            while (userReviewCount < 2):
                print username + ': ' + str(userReviewCount + 1)
                data = untappd.getUserReviewData(username, offsetTotal)
                offset = data['response']['beers']['count']
                offsetTotal += offset
                reviews = data['response']['beers']['items']
                for review in reviews:
                    userRating = review['rating_score']
                    if userRating > 0:
                        beerInfo = review['beer']
                        breweryInfo = review['brewery']
                        # fill in beer information
                        if hash(str(beerInfo['bid'])) not in beersList:
                            stylesList = []
                            style = unicode(beerInfo['beer_style']).encode("utf-8")
                            styles = style.lower().title().split('/')
                            for style in styles:
                                style = style.strip()
                                stylesList.append(style)
                            beerAttribs = {
                                'bid': str(beerInfo['bid']),
                                'name': unicode(beerInfo['beer_name']).encode("utf-8"),
                                'label': beerInfo['beer_label'],
                                'abv': beerInfo['beer_abv'],
                                'ibu': beerInfo['beer_ibu'],
                                'style': stylesList,
                                'description': unicode(beerInfo['beer_description']).encode("utf-8"),
                                'rating': beerInfo['rating_score'],
                                'numRatings': 1,
                                'brewery': str(breweryInfo['brewery_id'])
                            }
                            beer = UT.UntappdBeer(beerAttribs)
                            beersList[hash(beer.bid)] = beer
                        else:
                            beersList[hash(str(beerInfo['bid']))].numRatings += 1
                        # fill in brewery information
                        if hash(str(breweryInfo['brewery_id'])) not in breweryList:
                            breweryAttribs = {
                                'breweryId': str(breweryInfo['brewery_id']),
                                'name': unicode(breweryInfo['brewery_name']).encode("utf-8"),
                                'label': breweryInfo['brewery_label'],
                                'country': unicode(breweryInfo['country_name']).encode("utf-8"),
                                'location': unicode(breweryInfo['location']).encode("utf-8")
                            }
                            brewery = UT.UntappdBrewery(breweryAttribs)
                            breweryList[hash(brewery.breweryId)] = brewery

                        # map breweery_id to a list of beers produced there
                        if hash(str(breweryInfo['brewery_id'])) not in breweryToBeers:
                            # store the current beer in a list of beers of
                            # the brewery
                            breweryToBeers[hash(str(breweryInfo['brewery_id']))] = {str(breweryInfo['brewery_id']): [str(beerInfo['bid'])]}
                        else:
                            # add current beer to brewery's list of beers
                            breweryToBeers[hash(str(breweryInfo['brewery_id']))][str(breweryInfo['brewery_id'])].append(str(beerInfo['bid']))

                        # add list of beer ratings to user
                        ratings[str(beerInfo['bid'])] = userRating
                userReviewCount += 1
                user.ratings = ratings

                # store the dictionaries after new data so user doesn't kill process before writing
                # with open('../data/users.json', 'wb') as usersFile:
                #     json = jpickle.encode(usersList)
                #     usersFile.write(json)
                # with open('../data/beers.json', 'wb') as beersFile:
                #     json = jpickle.encode(beersList)
                #     beersFile.write(json)
                # with open('../data/breweries.json', 'wb') as breweriesFile:
                #     json = jpickle.encode(breweryList)
                #     breweriesFile.write(json)
                # with open('../data/breweryToBeers.json', 'wb') as breweryToBeersFile:
                #     json = jpickle.encode(breweryToBeers)
                #     breweryToBeersFile.write(json)

                # if the offset is less than 25, then there are no more reviews to retrieve
                if offset < 25:
                    break
            writeJSONFile('../data/users.json', usersList)
            writeJSONFile('../data/beers.json', beersList)
            writeJSONFile('../data/breweries.json', breweryList)
            writeJSONFile('../data/breweryToBeers.json', breweryToBeers)

            total += len(ratings)
            print str(userId) + ': ' + username + ', Processed: ' + str(len(ratings)) + ' reviews'
            print 'Total Reviews: ' + str(total)
            print 'Total Users Completed: ' + str(totalUsersComplete)
            sleep(37 * (userReviewCount))
        else:
            total += len(user.ratings)