def performComputation(data): try: # SELECT IMPORTANT COMPONENTS data = data[['tweetText', 'sentiment', 'weight', 'timeCreated']] sentiment_valcounts = data['sentiment'].value_counts(sort=False) # PLOT SENTIMENT BAR CHART (BOKEH) # create a new plot with a title and axis labels p = Bar(sentiment_valcounts, title='Tweets per Sentiment', height=450, xlabel='Sentiment', ylabel='Count') print sentiment_valcounts # output_file("histogram.html") # show(p) script_bar, div_bar = components(p) # END OF BAR CHART # PIE/DONUT CHART (google api) s = sentiment_valcounts.to_frame() # Convert series to dataframe sentiment_labels = list(s.index) # Get the index [positive or negative] sentiment_values = [sentiment_valcounts[0], sentiment_valcounts[1]] piedata = [(sentiment_labels[0], np.asscalar(sentiment_valcounts[0])), # Converting from numpy Int64 to normal int (sentiment_labels[1], np.asscalar(sentiment_valcounts[1]))] # Converting from numpy Int64 to normal int no_positive_tweets = np.asscalar(sentiment_valcounts[0]) no_negative_tweets = np.asscalar(sentiment_valcounts[1]) # END PIE # WORDCLOUD # Get Stop Words stop = act.getStopWordList() text = data['tweetText'] tokens = [] for txt in text.values: tokens.extend([t.lower().strip(":,.") for t in txt.split()]) filtered_tokens = [w for w in tokens if not w in stop] # freq_dist = FreqDist(filtered_tokens) p = pd.Series(filtered_tokens) # get the counts per word freq = p.value_counts() # How many max words do we want to give back freq = freq.ix[0:30] # print freq.keys()[:20] freq_json = freq.to_json() # response.headers.add('Access-Control-Allow-Origin', "*") # END OF WORDCLOUD return script_bar, div_bar, data, piedata, freq_json, no_positive_tweets, no_negative_tweets except Exception as e: print e return render_template('error.html', error=e)