Exemplo n.º 1
0
def GetLocalGig():
    variables = {}
    variables = load_variables()
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/LocalGig/",
                                               "_view/local-gig-view",
                                               "?reduce=true&group_level=1")
    statemap = {}
    for row in response["rows"]:
        statemap[row["key"][0]] = row["value"]

    statemap = state_sorter_twitter(statemap)

    statekeys = list(statemap.keys())
    statevalues = list(statemap.values())
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/LocalGig/",
                                               "_view/local-gig-view",
                                               "?reduce=true&group_level=2")

    fig, ax = plt.subplots()
    ax.bar(statekeys, statevalues)
    ax.set_ylabel('Count')
    ax.set_title(
        'Num of Gig economy was tweeted in states of Australia from 2010 to 2020'
    )
    ax.set_xticklabels(statekeys)
    for a, b in zip(statekeys, statevalues):
        plt.text(a,
                 b + 130,
                 str(b),
                 horizontalalignment='center',
                 verticalalignment='center')
    fig.savefig('img/stateGig.png')
    return "img/stateGig.png"
Exemplo n.º 2
0
def get_business_popularity():

    variables = {}
    variables = load_variables()
    # Extract popular keyword view
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/keyword_count/",
                                               "_view/keyword_popular/",
                                               "?reduce=true&group_level=1")

    #store response in dictionary
    keyword_count_dict = {}
    for row in response["rows"]:
        keyword_count_dict[row["key"]] = row["value"]
    y_pos = list(keyword_count_dict.keys())
    x_pos = list(keyword_count_dict.values())

    #plot the graph
    plt.rcdefaults()
    fig, ax = plt.subplots(figsize=(10, 10))
    y_arrange = np.arange(len(y_pos))
    ax.barh(y_arrange, x_pos, align='center', color="red")
    ax.set_yticks(y_arrange)
    ax.set_yticklabels(y_pos)
    ax.invert_yaxis()  # labels read top-to-bottom
    ax.set_xlabel('Number of Tweets')
    ax.set_title('Popularity of Gig Economy Businesses')

    #save and return image path
    fig.savefig('img/keyword_pop.png')
    return "img/keyword_pop.png"
Exemplo n.º 3
0
def GetUsefulKeywords():
    variables = {}
    variables = load_variables()
    response = couchdb_requests.couch_get_view(
        variables, "twitter/", "_design/GigSentimental/",
        "_view/sentimentOnStateKeywordYear", "?reduce=true&group_level=3")
    usefulKeywords = []
    keywordsList = [
        'airbnb', 'airly', 'airtasker', 'bettercaring', 'camplify',
        'carnextdoor', 'classbento', 'deliveroo', 'designcrowd', 'doordash',
        'ebay', 'etsy', 'fiverr', 'gocatch', 'gumtree', 'helpling', 'homeaway',
        'lyft', 'menulog', 'olacab', 'parkhound', 'pawshake', 'ratesetter',
        'redbubble', 'shebah', 'sidekicker', 'spacer', 'stayz', 'stellar',
        'uber', 'upwork', 'urbansitter', 'zomato', 'zoom2u'
    ]
    locList = [
        'Melbourne', 'Adelaide', 'Brisbane', 'Canberra', 'Darwin', 'Hobart',
        'Perth', 'Sydney', 'Victoria', 'South Australia', 'Queensland',
        'New South Wales', 'Northern Territory', 'Tasmania',
        'Western Australia'
    ]
    for i in response["rows"]:
        location = i["key"][0]
        word = i["key"][1]
        year = i["key"][2]
        # only want data for the particular keyword selected in 2017
        if (word in keywordsList and year == "2017"):
            if location in locList:
                # check if the keyword already exists in the list or not
                if (word not in usefulKeywords):
                    usefulKeywords.append(word)
    return usefulKeywords
Exemplo n.º 4
0
def get_income_tweet():
    variables = {}
    variables = load_variables()

    # Get the Aurin Income Data
    response = couchdb_requests.couch_get_view(variables, "aurin-mean-income/",
                                               "_design/city-income/",
                                               "_view/gccsavsincome-view/", "")

    #Parse the response
    city_income_dict = {}

    for row in response["rows"]:
        city_income_dict[row["key"]] = row["value"]

    #Get the twitter data for that location
    city_income_dict = state_sorter_aurin(city_income_dict)
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/location-keyword/",
                                               "_view/locationvskeyword/",
                                               "?reduce=true&group_level=1")
    #Parse the response
    city_count_dict = {}
    for row in response["rows"]:
        city_count_dict[row["key"][0]] = row["value"]

    city_count_dict = state_sorter_twitter(city_count_dict)

    # create graph
    incomes = list(city_income_dict.values())
    tweet_num = list(city_count_dict.values())
    states = list(city_count_dict.keys())
    fig, ax = plt.subplots()
    ax.scatter(incomes, tweet_num)
    ax.set_xlabel('Mean Income ($)')
    ax.set_ylabel('Number of Tweets')
    for i in range(len(incomes)):
        ax.annotate(
            ' ' + states[i],
            (incomes[i], tweet_num[i]),
        )

    #save and return image
    fig.savefig('img/incomevstweets.png')
    return "img/incomevstweets.png"
Exemplo n.º 5
0
def Getunemploy():
    variables = {}
    variables = load_variables()
    response = couchdb_requests.couch_get_view(variables,
                                               "aurin-employment-gcc/",
                                               "_design/states_unemploy/",
                                               "_view/unemploy_rate",
                                               "?reduce=true&group_level=3")

    unemployrate = defaultdict(float)
    for i in response["rows"]:
        location = i["key"][0]
        total = i["key"][1] + i["key"][2] + i['value']

        rate = i["value"] / total  # calulate the unemployment rate

        # group value by state
        if location == "New South Wales":
            unemployrate["NSW"] += round(rate, 4)
        elif location == "Queensland":
            unemployrate["QLD"] += round(rate, 4)
        elif location == "South Australia":
            unemployrate["SA"] += round(rate, 4)
        elif location == "Victoria":
            unemployrate["VIC"] += round(rate, 4)
        elif location == "Western Australia":
            unemployrate["WA"] += round(rate, 4)

    # start plot
    statekeys = list(unemployrate.keys())
    statevalues = list(unemployrate.values())
    fig, ax = plt.subplots()
    ax.barh(statekeys, statevalues, height=0.5)
    ax.set_xlabel('Unemployment Rate', fontsize=12)
    ax.set_ylabel('States in Australia', fontsize=12)
    ax.set_title('Unemployment Rate in 2018', fontsize=16)
    ax.set_yticklabels(statekeys)
    for a, b in zip(statevalues, statekeys):
        plt.text(a - 0.003,
                 b,
                 '{:.2%}'.format(a),
                 horizontalalignment='center',
                 verticalalignment='center')
    fig.savefig('img/unemployment_rate.png')
    return 'img/unemployment_rate.png', unemployrate
Exemplo n.º 6
0
def get_business_pop_location():

    variables = {}
    variables = load_variables()

    # Extract popular keyword view
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/location-keyword/",
                                               "_view/locationvskeyword/",
                                               "?reduce=true&group_level=2")

    # Parse the response and store popularity according to each state
    states = ['VIC', 'NSW', 'WA', 'SA', 'NT', 'ACT', 'QLD', 'TAS']
    keyword_loc_dict = {}
    for key in states:
        keyword_loc_dict[key] = {}
    for row in response["rows"]:
        if (row["key"][0] == 'OT'):
            pass
        else:
            keyword_loc_dict[row["key"][0]].update(
                {row["key"][1]: row["value"]})

    images = []
    #create graph for each state
    for state in states:
        y_pos = list(keyword_loc_dict[state].keys())
        x_pos = list(keyword_loc_dict[state].values())
        plt.rcdefaults()
        fig, ax = plt.subplots(figsize=(10, 10))
        y_arrange = np.arange(len(y_pos))
        ax.barh(y_arrange, x_pos, align='center')
        ax.set_yticks(y_arrange)
        ax.set_yticklabels(y_pos)
        ax.invert_yaxis()  # labels read top-to-bottom
        ax.set_xlabel('Number of Tweets')
        ax.set_title('Popularity of Gig Economy Businesses in ' + state)
        #save figure for state
        fig.savefig('img/keyword_loc_' + state + '.png')
        images.append('img/keyword_loc_' + state + '.png')
    return images
Exemplo n.º 7
0
def get_unemployment_tweet():
    variables = {}
    variables = load_variables()
    #get unemployment data from view
    response = couchdb_requests.couch_get_view(variables,
                                               "aurin-employment-sa2/",
                                               "_design/unemployment_doc/",
                                               "_view/sa2_decdata/",
                                               "?reduce=true&group_level=1")
    #parse the response
    unemployment_count = {}
    for row in response["rows"]:
        unemployment_count[row["key"]] = row["value"]

    unemployment_count['QLD'] = [
        4348, 4386, 4950, 4528, 5129, 5872, 5941, 5664
    ]
    total = []

    #get tweets based on year
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/location-keyword/",
                                               "_view/year_keyword/",
                                               "?reduce=true&group_level=1")

    #parse response for graphing
    year_count = {}
    for row in response["rows"]:
        year_count[row["key"][0]] = row["value"]

    #get tweets based on location
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/location-keyword/",
                                               "_view/year_location/",
                                               "?reduce=true&group_level=2")
    year_location_count = {}
    for key in unemployment_count.keys():
        year_location_count[key] = {}
    for row in response["rows"]:
        year_location_count[row["key"][1]].update(
            {row["key"][0]: row["value"]})

    #create graph one: unemployment vs year per state
    x_axis = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']
    plt_1 = plt.figure(1, figsize=(10, 10))
    for state in unemployment_count.keys():
        plt.plot(x_axis, unemployment_count[state], label=state)

    plt.xlabel('year', fontsize=14)
    plt.ylabel('no. of people unemployed', fontsize=14)
    plt.title('Unemployment vs Year (State)', fontsize=20)
    plt_1.legend(loc=7)
    plt_1.savefig('img/unemp_vs_year_state.png')

    #create graph two: unemployment vs year overall
    total = np.zeros(len(unemployment_count['VIC']))
    for value in unemployment_count.values():
        for i in range(len(value)):
            total[i] += value[i]

    plt_2 = plt.figure(2, figsize=(8, 8))
    plt.plot(x_axis, total)
    plt.xlabel('year', fontsize=14)
    plt.ylabel('no. of people unemployed', fontsize=14)
    plt.title('Unemployment vs Year', fontsize=20)
    plt_2.savefig('img/unemp_vs_year.png')

    #create graph two: unemployment vs year overall
    x_axis_4 = list(year_count.keys())
    y_axis_4 = []

    for year in x_axis:
        y_axis_4.append(year_count[year])

    plt_3 = plt.figure(3, figsize=(8, 8))
    plt.plot(x_axis, y_axis_4, color="red")
    plt.xlabel('year', fontsize=16)
    plt.ylabel('no. of gig economy tweets', fontsize=16)
    plt.title('Gig Economy Tweets vs year', fontsize=20)
    plt_3.savefig('img/tweets_vs_year_state.png')

    #save and return images
    images = [
        'img/unemp_vs_year_state.png', 'img/unemp_vs_year.png',
        'img/tweets_vs_year_state.png'
    ]
    return images
Exemplo n.º 8
0
def GetKeywordSentiment(keyword):
    variables = {}
    variables = load_variables()
    response = couchdb_requests.couch_get_view(
        variables, "twitter/", "_design/GigSentimental/",
        "_view/sentimentOnStateKeywordYear", "?reduce=true&group_level=3")
    statemap = initialiseStateMap()
    freq = defaultdict(int)
    for i in response["rows"]:
        location = i["key"][0]
        word = i["key"][1]
        year = i["key"][2]
        # only want data for the particular keyword selected in 2017
        # data processing
        if (word == keyword and year == "2017"):
            value = i["value"]
            if location == "Melbourne":
                statemap["VIC"] += value
                freq["VIC"] += 1
            elif location == "Adelaide":
                statemap["SA"] += value
                freq["SA"] += 1
            elif location == "Brisbane":
                statemap["QLD"] += value
                freq["QLD"] += 1
            elif location == "Canberra":
                statemap["NSW"] += value
                freq["NSW"] += 1
            elif location == "Darwin":
                statemap["NT"] += value
                freq["NT"] += 1
            elif location == "Hobart":
                statemap["TAS"] += value
                freq["TAS"] += 1
            elif location == "Perth":
                statemap["WA"] += value
                freq["WA"] += 1
            elif location == "Sydney":
                statemap["NSW"] += value
                freq["NSW"] += 1
            elif location == "Victoria":
                statemap["VIC"] += value
                freq["VIC"] += 1
            elif location == "South Australia":
                statemap["SA"] += value
                freq["SA"] += 1
            elif location == "Queensland":
                statemap["QLD"] += value
                freq["QLD"] += 1
            elif location == "New South Wales":
                statemap["NSW"] += value
                freq["NSW"] += 1
            elif location == "Northern Territory":
                statemap["NT"] += value
                freq["NT"] += 1
            elif location == "Tasmania":
                statemap["TAS"] += value
                freq["TAS"] += 1
            elif location == "Western Australia":
                statemap["WA"] += value
                freq["WA"] += 1
            else:
                statemap[location] += value
                freq[location] += 1
    # get the avg (rounding to 2 decimal places)
    for state, value in statemap.items():
        if (freq[state] != 0):
            value = (value / freq[state] * 100)
            statemap[state] = round(value, 2)
    # store the state with the lowest sentiment value and the sentiment value
    (lowestSentValue, lowestSentState) = min(
        (value, key) for key, value in statemap.items() if value != 0)
    isHypothesisTrue = False
    # assume negative sentiment initially
    isTasNegative = True
    isSANegative = True
    # hypothesis true
    if (statemap["TAS"] < 0 and statemap["SA"] < 0):
        isHypothesisTrue = True
    # check if tasmania has positive sentiment
    if (statemap["TAS"] > 0):
        isTasNegative = False
    # check if south australia has positive sentiment
    if (statemap["SA"] > 0):
        isSANegative = False
    statekeys = list(statemap.keys())
    statevalues = list(statemap.values())
    # bar chart plotting
    fig, ax = plt.subplots()
    # draw a line on y=0
    plt.axhline(y=0, linestyle='-', color='black')
    ax.bar(statekeys, statevalues, color='green')
    ax.set_ylabel('Sentiment (%)')
    ax.set_title('Sentiment Value on Gig Economy Based \n on \"' + keyword +
                 '\" keyword ' + 'for States of Australia',
                 loc='center')
    ax.set_xticklabels(statekeys)
    for a, b in zip(statekeys, statevalues):
        if (b == 0):
            plt.text(a, b, "", horizontalalignment='center')
        elif (b < 0):
            plt.text(a, b - 1.7, str(b), horizontalalignment='center')
        else:
            plt.text(a, b + 0.4, str(b), horizontalalignment='center')
    fig.savefig('img/keywordSentiment.png')

    return ("img/keywordSentiment.png", isHypothesisTrue, isTasNegative,
            isSANegative, lowestSentState, lowestSentValue)
Exemplo n.º 9
0
def GetElderlyPopPercentage():
    variables = {}
    variables = load_variables()
    response = couchdb_requests.couch_get_view(variables, "aurin-population/",
                                               "_design/population/",
                                               "_view/population/", "")
    statemap = defaultdict(int)
    freq = defaultdict(int)
    # going through the twitter data from couchdb
    # do data processing
    print(response)
    for i in response["rows"]:
        location = i["key"]
        value = i["value"]["elderly_pop_pr100"]
        if location == "Greater Melbourne":
            statemap["VIC"] += value
            freq["VIC"] += 1
        elif location == "Greater Adelaide":
            statemap["SA"] += value
            freq["SA"] += 1
        elif location == "Greater Brisbane":
            statemap["QLD"] += value
            freq["QLD"] += 1
        elif location == "Greater Darwin":
            statemap["NT"] += value
            freq["NT"] += 1
        elif location == "Greater Hobart":
            statemap["TAS"] += value
            freq["TAS"] += 1
        elif location == "Greater Perth":
            statemap["WA"] += value
            freq["WA"] += 1
        elif location == "Greater Sydney":
            statemap["NSW"] += value
            freq["NSW"] += 1
        elif location == "Rest of Vic.":
            statemap["VIC"] += value
            freq["VIC"] += 1
        elif location == "Rest of SA":
            statemap["SA"] += value
            freq["SA"] += 1
        elif location == "Rest of Qld":
            statemap["QLD"] += value
            freq["QLD"] += 1
        elif location == "Rest of NSW":
            statemap["NSW"] += value
            freq["NSW"] += 1
        elif location == "Rest of NT":
            statemap["NT"] += value
            freq["NT"] += 1
        elif location == "Rest of Tas.":
            statemap["TAS"] += value
            freq["TAS"] += 1
        elif location == "Rest of WA":
            statemap["WA"] += value
            freq["WA"] += 1
        else:
            statemap[location] += value
            freq[location] += 1
    # get the avg (after rounding to 2 decimal places)
    for state, value in statemap.items():
        statemap[state] = round(value / freq[state], 1)
    statekeys = list(statemap.keys())
    statevalues = list(statemap.values())
    # bar chart plotting
    fig, ax = plt.subplots()
    ax.bar(statekeys, statevalues, color='blue')
    ax.set_ylabel('Population (%)')
    ax.set_title('Elderly Population for States of Australia in 2017')
    ax.set_xticklabels(statekeys)
    for a, b in zip(statekeys, statevalues):
        plt.text(a,
                 b + 0.5,
                 str(b),
                 horizontalalignment='center',
                 verticalalignment='center')
    fig.savefig('img/elderlyAgePop.png')
    return "img/elderlyAgePop.png"
Exemplo n.º 10
0
def GetsentimentD():
    variables = {}
    variables = load_variables()
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/regionVSsentiment/",
                                               "_view/senti_date",
                                               "?reduce=true&group_level=2")
    count = defaultdict(int)
    statemap = defaultdict(int)
    for i in response["rows"]:
        location = i["key"][0]
        date = i["key"][1]
        year = date.split(' ')[-1]

        # find the data that in 2018
        if year == '2018':
            value = i["value"]['sum']

            if location == "Melbourne":
                count['VIC'] += 1
                statemap["VIC"] += value
            elif location == "Adelaide":
                count['SA'] += 1
                statemap["SA"] += value
            elif location == "Brisbane":
                count['QLD'] += 1
                statemap["QLD"] += value
            elif location == "Canberra":
                count['NSW'] += 1
                statemap["NSW"] += value
            elif location == "Darwin":
                count['NT'] += 1
                statemap["NT"] += value
            elif location == "Hobart":
                count['TAS'] += 1
                statemap["TAS"] += value
            elif location == "Perth":
                count['WA'] += 1
                statemap["WA"] += value
            elif location == "Sydney":
                count['NSW'] += 1
                statemap["NSW"] += value
            elif location == "Victoria":
                count['VIC'] += 1
                statemap["VIC"] += value
            elif location == "South Australia":
                count['SA'] += 1
                statemap["SA"] += value
            elif location == "Queensland":
                count['QLD'] += 1
                statemap["QLD"] += value
            elif location == "New South Wales":
                statemap["NSW"] += value
            elif location == "Northern Territory":
                count['NT'] += 1
                statemap["NT"] += value
            elif location == "Tasmania":
                count['TAS'] += 1
                statemap["TAS"] += value
            elif location == "Western Australia":
                count['WA'] += 1
                statemap["WA"] += value
            else:
                count[location] += 1
                statemap[location] += value
    # print(count)
    for key in statemap.keys():
        statemap[key] = statemap[key] / count[key]

    statekeys = list(statemap.keys())
    statevalues = list(statemap.values())

    # start plot
    fig, ax = plt.subplots()
    ax.bar(statekeys, statevalues, width=0.5)
    ax.set_ylabel('Sentiment Rate', fontsize=12)
    ax.set_xlabel('States in Australia', fontsize=12)
    ax.set_title('Sentiment to Gig Economy in 2018', fontsize=16)
    ax.set_xticklabels(statekeys)

    for a, b in zip(statekeys, statevalues):
        plt.text(a,
                 b + 0.0025,
                 round(b, 3),
                 horizontalalignment='center',
                 verticalalignment='center')
    fig.savefig('img/sentiment2018.png')

    return 'img/sentiment2018.png', statemap
Exemplo n.º 11
0
def Getsentiment():
    variables = {}
    variables = load_variables()
    response = couchdb_requests.couch_get_view(variables, "twitter/",
                                               "_design/regionVSsentiment/",
                                               "_view/senti",
                                               "?reduce=true&group_level=1")
    statemap = defaultdict(int)
    for i in response["rows"]:
        location = i["key"][0]
        # calulate the sentiment value for each region.
        value = i["value"]['sum'] / i["value"]['count']

        # group value by state
        if location == "Melbourne":
            statemap["VIC"] += value
        elif location == "Adelaide":
            statemap["SA"] += value
        elif location == "Brisbane":
            statemap["QLD"] += value
        elif location == "Canberra":
            statemap["NSW"] += value
        elif location == "Darwin":
            statemap["NT"] += value
        elif location == "Hobart":
            statemap["TAS"] += value
        elif location == "Perth":
            statemap["WA"] += value
        elif location == "Sydney":
            statemap["NSW"] += value
        elif location == "Victoria":
            statemap["VIC"] += value
        elif location == "South Australia":
            statemap["SA"] += value
        elif location == "Queensland":
            statemap["QLD"] += value
        elif location == "New South Wales":
            statemap["NSW"] += value
        elif location == "Northern Territory":
            statemap["NT"] += value
        elif location == "Tasmania":
            statemap["TAS"] += value
        elif location == "Western Australia":
            statemap["WA"] += value
        else:
            statemap[location] += value

    statekeys = list(statemap.keys())
    statevalues = list(statemap.values())

    fig, ax = plt.subplots()
    ax.bar(statekeys, statevalues, width=0.5)
    ax.set_ylabel('Sentiment Rate')
    ax.set_title('Sentiment About Gig Economy')
    ax.set_xticklabels(statekeys)

    for a, b in zip(statekeys, statevalues):
        plt.text(a,
                 b + 0.01,
                 round(b, 3),
                 horizontalalignment='center',
                 verticalalignment='center')
    fig.savefig('img/sentiment.png')

    return 'img/sentiment.png'