Пример #1
0
    def plot_data(self, pdf_file, image_filename):
        place_types = []
        reference_counts = []
        for data_row in Utils.get_data_vectors(self._datafile, ' '):
            place_types.append(data_row[0])
            reference_counts.append(int(data_row[1]))
        figure(1, figsize = (6, 6))
        ax = axes([0.1, 0.1, 0.8, 0.8])
        #  The slices will be ordered and plotted counter-clockwise.
        labels = place_types
        total_references = sum(reference_counts)
        #  Eliminate the NA data
        index = place_types.index('NotAvailable')
        place_types.pop(index)
        NA_count = reference_counts.pop(index)
        NA_percentage = 100 * NA_count / float(total_references)
        total_references -= NA_count

        fracs = [count / float(total_references) for count in reference_counts]
        explode = (0, 0, 0, 0, 0)
        mpl.rcParams['font.size'] = 9.0

        pie(fracs, explode = explode, labels = labels, autopct = '%1.1f%%', shadow = False, startangle = 0)
        #  The default startangle is 0, which would start
        #  the Frogs slice on the x-axis.  With startangle=90,
        #  everything is rotated counter-clockwise by 90 degrees,
        #  so the plotting starts on the positive y-axis.

        title('Place Type Distribution: ' + str(NA_percentage) +
              '% tweets dont have a place type, showing the remaining tweets',
              bbox = {'facecolor':'0.8', 'pad':5})
        savefig(pdf_file, format = 'pdf')
        savefig(image_filename)
        clf()
Пример #2
0
    def plot_data(self, pdf_file, image_filename):
        categories = []
        tweet_count = []
        for data_row in Utils.get_data_vectors(self._datafile, ' '):
            categories.append(data_row[0])
            tweet_count.append(int(data_row[1]))
        figure(1, figsize = (6, 6))
        ax = axes([0.1, 0.1, 0.8, 0.8])

        #  The slices will be ordered and plotted counter-clockwise.
        labels = categories
        total_references = sum(tweet_count)
        fracs = [count / float(total_references) for count in tweet_count]
        explode = (0, 0)
        mpl.rcParams['font.size'] = 12.0
        pie(fracs, explode = explode, labels = labels, autopct = '%1.1f%%', shadow = False, startangle = 0)
        #  The default startangle is 0, which would start
        #  the Frogs slice on the x-axis.  With startangle=90,
        #  everything is rotated counter-clockwise by 90 degrees,
        #  so the plotting starts on the positive y-axis.

        title('Tweet Distribution for ' + str(total_references) + ' tweets.', bbox = {'facecolor':'0.8', 'pad':5})
        savefig(pdf_file, format = 'pdf')
        savefig(image_filename)
        clf()
Пример #3
0
 def plot_data(self, pdf_file, image_filename):
     data_points = []
     total = 0
     for data_row in Utils.get_data_vectors(self._datafile, ' '):
         bin = int(float(data_row[0]))
         data = int(float(data_row[1]))
         total += data
         data_points.append((bin, data))
     bins = []
     data = []
     for data_point in sorted(data_points, key = itemgetter(0)):
         bins.append(data_point[0])
         percentage = 100 * data_point[1] / float(total)
         data.append(percentage)
     P.bar(bins, data, width = 1, color = 'r')
     mpl.rcParams['font.size'] = 10.0
     P.xlabel('Geotagged tweet percentage')
     P.ylabel('Users percentage')
     P.savefig(pdf_file, format = 'pdf')
     P.savefig(image_filename)
     P.clf()
 def plot_data(self, pdf_file, image_filename):
     data_points = []
     total_user_count = 0
     total_tweet_count = 0
     for data_row in Utils.get_data_vectors(self._datafile, ' '):
         num_tweets = int(data_row[0])
         user_count = int(data_row[1])
         total_user_count += user_count
         total_tweet_count += num_tweets
         data_points.append((num_tweets, user_count))
     bins = []
     user_count = []
     for data_point in sorted(data_points, key = itemgetter(0)):
         bins.append(log(data_point[0]))    #  / float(total_tweet_count))
         user_count.append(log(data_point[1]))    #  / float(total_user_count))
     _, axes = P.subplots()
     axes.plot(user_count, bins, 'ro')
     axes.set_ylabel('log(GeoTweet Count)')
     axes.set_xlabel('log(User Count)')
     P.savefig(pdf_file, format = 'pdf')
     P.savefig(image_filename)
     P.clf()
    def plot_data(self, pdf_file, image_filename):
        data_dict = {}
        for data_row in Utils.get_data_vectors(self._datafile, '|'):
            data_dict[data_row[0]] = int(data_row[1])
        fig = figure(1, figsize = (6, 6))
        ax = axes([0.2, 0.2, 0.6, 0.6])

        #  The slices will be ordered and plotted counter-clockwise.
        data_dict = sorted(data_dict.iteritems(), key = itemgetter(1), reverse = True)
        total_tweet_count_original = sum(float(data[1]) for data in data_dict)
        data_dict = data_dict[0:5] + data_dict[10:12] + data_dict[6:9] + data_dict[13:15]

        total_tweet_count = sum(float(data[1]) for data in data_dict)
        application_names = list(data[0] for data in data_dict)
        tweet_percentage = 100 * (total_tweet_count / float(total_tweet_count_original))
        fracs = [float(data[1]) / total_tweet_count for data in data_dict]
        mpl.rcParams['font.size'] = 6.0
        pie(fracs, labels = application_names, autopct = '%1.1f%%', shadow = False, startangle = 0)
        title('Tweet sources Distribution: Showing top 15 tweet sources which generate ' +
              str(tweet_percentage) +
               '% of the total tweets', bbox = {'facecolor':'0.8', 'pad':5})
        savefig(pdf_file, format = 'pdf')
        savefig(image_filename)
        clf()