def algorithm(df, params): """ wrapper function to put each individual algorithm inside :param df: dataframe that contains all the input dataset :param params: algorithm specific parameters :return: a dictionary of { outputname: output content in memory } """ output = {} # algorithm specific code # construct sentiment analysis SA = Sentiment(df, params['column']) sentiment_sentence, sentiment_doc = SA.sentiment(params['algorithm']) output['sentiment'] = sentiment_sentence output['doc'] = sentiment_doc if params['algorithm'] == 'vader': output['negation'] = SA.negated() output['allcap'] = SA.allcap() # plot labels = ['negative', 'neutral', 'positive'] values = [sentiment_doc['neg'], sentiment_doc['neu'], sentiment_doc['pos']] output['div'] = plot.plot_pie_chart(labels, values, title='Sentiment of the dataset') return output
def delete_negative_comment(insta_username): media_id = get_post_id(insta_username) request_url = (BASE_URL + 'media/%s/comments/?access_token=%s') % ( media_id, APP_ACCESS_TOKEN) print 'GET request url : %s' % (request_url) comment_info = requests.get(request_url).json() no_neg_comm = 0 no_pos_comm = 0 if comment_info['meta']['code'] == 200: if len(comment_info['data']): #Here's a naive implementation of how to delete the negative comments :) for x in range(0, len(comment_info['data'])): comment_id = comment_info['data'][x]['id'] comment_text = comment_info['data'][x]['text'] blob = TextBlob(comment_text, analyzer=NaiveBayesAnalyzer()) if (blob.sentiment.p_neg > blob.sentiment.p_pos): no_neg_comm += 1 else: no_pos_comm += 1 plot_pie_chart(no_pos_comm, no_neg_comm) else: print 'There are no existing comments on the post!' else: print 'Status code other than 200 received!'
def algorithm(df, params): """ wrapper function to put each individual algorithm inside :param df: dataframe that contains all the input dataset :param params: algorithm specific parameters :return: a dictionary of { outputname: output content in memory } """ output = {} # load classification model with open(params['pipeline'] + ".pickle", 'rb') as f: text_clf = pickle.load(f) # load data data = df[df[params['column']] != ''][params['column']].dropna().astype( 'str').tolist() # predict using trained model predicted = text_clf.predict(data) result = [['text', 'class']] for i in range(len(data)): result.append([data[i], predicted[i]]) output['predicted'] = result # plot percentage of class predicted_counts = Counter(predicted) labels = [] values = [] for key in predicted_counts.keys(): labels.append("class: " + str(key)) values.append(predicted_counts[key]) output['div'] = plot.plot_pie_chart( labels, values, title="break down of the predicted class") return output
def algorithm(df, params): """ wrapper function to put each individual algorithm inside :param df: dataframe that contains all the input dataset :param params: algorithm specific parameters :return: a dictionary of { outputname: output content in memory } """ output = {} CF = Classification(df, params['column']) output['uid'] = params['uid'] training_set, testing_set = CF.split(int(params['ratio'])) output['training'] = training_set output['testing'] = testing_set # plot labels = ['training set data points', 'unlabeled data points'] values = [len(training_set), len(testing_set)] output['div'] = plot.plot_pie_chart( labels, values, title='breakdown of training vs testing size') return output
def algorithm(df=None, params=None): """ wrapper function to put each individual algorithm inside :param df: dataframe that contains all the input dataset :param params: algorithm specific parameters :return: a dictionary of { outputname: output content in memory } """ output = {} # user specify which column to; each row is a sentence, get a list of sentences column = params['column'] sentences = df[df[column] != ''][column].dropna().astype('str').tolist() entity_list = [] entity_freq = {} entity_category = {} # extract entities in each sentence ner = TwitterNER() for sentence in sentences: tokens = tokenizeRawTweetText(sentence) raw_entities = ner.get_entities(tokens) entities = [] for entry in raw_entities: # record entities entity = " ".join(tokens[entry[0]:entry[1]]) category = entry[2] entities.append((entity, category)) # record entity frequency if entity not in entity_freq.keys(): entity_freq[entity] = 1 else: entity_freq[entity] += 1 # record category if category not in entity_category.keys(): entity_category[category] = 1 else: entity_category[category] += 1 entity_list.append(entities) # extract entities in each sentence output['entity'] = entity_list # plot bar chart of most frequent entities output['freq'] = entity_freq output['div_freq'] = plot.plot_bar_chart( list(entity_freq.keys())[:30], list(entity_freq.values())[:30], "Top 30 Most Frequent Name Entities") # plot pie chart of entity categories output['div_category'] = plot.plot_pie_chart( list(entity_category.keys()), list(entity_category.values()), "Name Entity Category Breakdowns") return output