def getresult(inputdata): model = joblib.load('contextIdentifierBangla.pkl') df = pd.read_csv("word_weights.csv") distinctwords = df['word'] rwdata = inputdata rwdata = rwdata.splitlines() inputdata = tanvir_stemming.prepare_input(inputdata) #print(inputdata) #print(inputdata) testdata = [] #constructing list of feature vector from input data for i in inputdata: pg = [0] * 4 #making list of size 4, contins 0 in each cell k = 0 #feature vector for a paragraph for j in distinctwords: if j in i: cnt = i.count(j) pg[0] = pg[0] + cnt * df.at[k, 'politics'] pg[1] = pg[1] + cnt * df.at[k, 'religious'] pg[2] = pg[2] + cnt * df.at[k, 'sports'] pg[3] = pg[3] + cnt * df.at[k, 'entertainment'] k = k + 1 testdata.append(pg) #predict and print result res = model.predict(testdata) indxrw = 0 resultes = [] # added for store result dict for i in res: result = {} if i == 100: result["paragraph"] = rwdata[indxrw] result["category"] = "politics" result["summary"] = summarizer.summary('politics', rwdata[indxrw]) elif i == 200: result["paragraph"] = rwdata[indxrw] result["category"] = "religious" result["summary"] = summarizer.summary('politics', rwdata[indxrw]) elif i == 300: result["paragraph"] = rwdata[indxrw] result["category"] = "sports" result["summary"] = summarizer.summary('politics', rwdata[indxrw]) else: result["paragraph"] = rwdata[indxrw] result["category"] = "entertainment" result["summary"] = summarizer.summary('politics', rwdata[indxrw]) resultes.append(result) #print(result) indxrw = indxrw + 1 #print(resultes) return {"results": resultes}
def get_summary(): global result fetched_content = text_entry.get('1.0', tk.END) fetched_content = fetched_content.strip() if len(fetched_content) == 0: messagebox.showinfo("ERROR", "ENTER TEXT FOR SUMMARIZATION") else: result = summarizer.summary(fetched_content) if len(text_entry1.get('1.0', END)) != 0: text_entry1.delete('1.0', END) #print(result) text_entry1.insert(INSERT, result) # summary insertion
def conclusion(): global result if len(result) == 0: result = "" full_content = text_entry3.get('1.0', END) full_content = full_content.strip() if len(full_content) == 0: messagebox.showinfo("Warning", "FILE IS EMPTY") else: result = summarizer.summary(full_content) #generating the summary if len(result_text.get('1.0', END)) != 0: result_text.delete('1.0', END) result_text.insert(INSERT, result) #displaying the summary
def get_text(): global result result = "" full_content = "" url_text = url.get( ) # get() is used to retrieve the url text from the object- url if len(url_text) == 0: messagebox.showinfo("Warning", "URL NOT ENTERED") else: html_doc = request.urlopen(url_text) info = bs(html_doc, 'html.parser') # defining html parser contents = info.findAll( 'p') # findAll - finds and retrieves the contents of paragraph tag for content in contents: # identifying only the text full_content += content.text if len(result) != 0: result = "" result = summarizer.summary(full_content) if len(text_entry2.get('1.0', END)) != 0: text_entry2.delete('1.0', END) text_entry2.insert(INSERT, result)
import codecs from SemanticRoleLabelling import semanticRoleLabel from summarizer import summary if __name__ == "__main__": testFile = codecs.open("testFile.txt", "r", "utf-8") ## Read news article testText = testFile.read() ## Create semanticRoleModeller object semantic_role_obj = semanticRoleLabel(inputText=testText) ## Calls get_semantic_roles with semanticRoleModeller object {Subject, Actio, Object} list_of_events = semantic_role_obj.get_semantic_roles() ## Create summary object summary_obj = summary() ## Calls summary function with summary object event_data = summary_obj.calculate_summary(list_of_events)
def sumdocs(docs, tokenized_sents, offset, line_cnt, doc_id): #docs, tokenized_sents, offset, line_cnt, doc_id = args[0], args[1], args[2], args[3], args[4] global config, train2emb, test2emb, top_label_assignment, topics, document_phrase_cnt, inverted_index, OUT, comparative_dict, graph_builder start_time = time.time() if config['summ_method'] == 'sumdocs': # default is KNN search for CATE embedding if config[ 'comparative_opt'] == 'knn': #KNN comparative search, route 0 and route 1 count = defaultdict(int) for doc_agg_emb in test2emb[offset:offset + line_cnt]: sim_max = -1 category = None for label in label2emb: sim = 1 - spatial.distance.cosine(doc_agg_emb, label2emb[label]) if sim > sim_max: sim_max = sim category = label count[category] += 1 #print(count) category = max(count.items(), key=operator.itemgetter(1))[0] comp_pool = list( map(lambda x: x[0], top_label_assignment[category])) all_siblings = topics twin_docs = list( map(lambda x: x[0], top_label_assignment[category][:config['num_siblings']])) siblings_docs = [ list( map(lambda x: x[0], top_label_assignment[l][:config['num_siblings']])) for l in all_siblings if l != category ] comparative_docs = summarizer.compare(config, None, None, None, test2emb[offset:offset + line_cnt], train2emb, skip_doc=None, contain_doc=comp_pool) phrase_scores = summarizer.summary(config, docs, siblings_docs, twin_docs, comparative_docs, document_phrase_cnt, inverted_index, graph_builder=graph_builder) elif config['summ_method'] == 'sumdocs_wo_twins': count = defaultdict(int) for doc_agg_emb in test2emb[offset:offset + line_cnt]: sim_max = -1 category = None for label in label2emb: sim = 1 - spatial.distance.cosine(doc_agg_emb, label2emb[label]) if sim > sim_max: sim_max = sim category = label count[category] += 1 category = max(count.items(), key=operator.itemgetter(1))[0] comp_pool = list(map(lambda x: x[0], top_label_assignment[category])) all_siblings = topics # changes: 1010 twin_docs = list( map(lambda x: x[0], top_label_assignment[category][:config['num_siblings']])) siblings_docs = [ list( map(lambda x: x[0], top_label_assignment[l][:config['num_siblings']])) for l in all_siblings if l != category ] category, comparative_docs = '', [] phrase_scores = summarizer.summary(config, docs, siblings_docs, twin_docs, None, document_phrase_cnt, inverted_index, graph_builder=graph_builder) elif config['summ_method'] == 'sumdocs_textrank': category, comparative_docs = '', [] phrase_scores = summarizer.summary(config, docs, None, None, None, document_phrase_cnt, inverted_index, graph_builder=graph_builder) elif config['summ_method'] == 'graph_degen': category, comparative_docs = '', [] phrase_scores = graphdegen(docs) else: assert True == False # #print(time.time() - start_time) #start_time = time.time() mmr_selector(tokenized_sents, phrase_scores, doc_id, OUT, limits=config['word_limits']) return category, comparative_docs #return category, comparative_docs return
def getTimeline(screen_name): today = datetime.datetime.now() DD = datetime.timedelta(days=7) earlier = today - DD nltk.download('punkt') nltk.download('averaged_perceptron_tagger') nltk.download('stopwords') auth = authenticate() api = tweepy.API(auth, wait_on_rate_limit=True) all_tweets = [] count = 1 truth_value_sum = 0 try: new_tweets = api.user_timeline(screen_name=screen_name, count=1000, result_type='recent', tweet_mode='extended', since=earlier.strftime("%Y-%m-%d")) for tweet in new_tweets: created_at = str(tweet.created_at).split(' ') date = created_at[0].split('-') time = created_at[1].split(':') text = re.sub(r'http\S+', '', tweet.full_text) # print(date[0] , date[1] , date[2]) # print(time[0],time[1],time[2]) #print(re.sub(r'http\S+', '', tweet.full_text),tweet.created_at,tweet.user.screen_name,) create_at = {} create_at['yyyy'] = int(date[0]) create_at['mm'] = int(date[1]) create_at['dd'] = int(date[2]) create_at['hh'] = int(time[0]) create_at['min'] = int(time[1]) create_at['ss'] = int(time[2]) t_value = truthfullness.get_value(text) if (t_value == -99): ret_tweet = {} ret_tweet['error'] = 'Model file not found' all_tweets.append(ret_tweet) json_data = json.dumps(ret_tweet, ensure_ascii=False) print('Error:Model file not found') return json_data truth_value_sum += t_value count += 1 ret_tweet = {} about = summarizer.summary(text) if (len(about) <= 0): about.append('Nothing') ret_tweet['text'] = re.sub('\n', ' ', tweet.full_text) ret_tweet['user_name'] = tweet.user.screen_name ret_tweet['truth_score'] = t_value ret_tweet['about'] = about ret_tweet['create_at'] = create_at ret_tweet[ 'tweet_url'] = 'https://twitter.com/statuses/' + tweet.id_str all_tweets.append(ret_tweet) except Exception: ret_tweet = {} ret_tweet['error'] = 'Error, please enter valied screen name' all_tweets.append(ret_tweet) #print(len(all_tweets)) ret_tweets = {} ret_tweets['avg_t_value'] = float(truth_value_sum) / count ret_tweets['tweet_list'] = all_tweets json_data = json.dumps(ret_tweets, ensure_ascii=False) #print(json_data) return json_data