class URLSummarizer(object):

	page_title = ""

	def __init__(self):
		self.fs = FrequencySummarizer()

	def getTextFromURL(self, url):
		r = requests.get(url, proxies=proxies)
		soup = BeautifulSoup(r.text, "html.parser")
		self.page_title = soup.find("title").text
		text = ' '.join(map(lambda p: p.text, soup.find_all('p')))
		return text

	def summarizeURL(self, url, total_sent):
		"""
		fs: FrequencySummarizer
		url: Site url
		total_pars: number of sentences to return 
		"""
		url_text = self.getTextFromURL(url).replace(u"Â", u"").replace(u"â", u"")
		final_summary = self.fs.summarize(url_text.replace("\n"," "), total_sent)
		return " ".join(final_summary)

	
	def get_keywords(self, num_words=10):
		"""
		Returns num of keywords from summarizer
		"""
		return self.fs.keywords(num_words)

	def get_page_title(self):
		return self.page_title
Exemplo n.º 2
0
def summarizeURL(url, total_pars):
    url_text = getTextFromURL(url).replace(u"Â", u"").replace(u"â", u"")
    fs = FrequencySummarizer()
    final_summary = fs.summarize(url_text.replace("\n"," "), total_pars)
    
    for i in final_summary:
        print("*",i)
        print("<----------------------------------------------------------------->")
Exemplo n.º 3
0
    def summary(self, num_news):
        news = ""
        dic = {}
        df = self.df
        for i in xrange(len(df)):
            if df['text'][i] in dic:
                continue
            dic[df['text'][i]] = 1
            news = news + " " + (df['text'][i])

        fs = FrequencySummarizer()
        final_summary = fs.summarize(news, num_news)

        return final_summary
Exemplo n.º 4
0
def news():

    command = "scrapy runspider news_spider.py -o news.csv"
    os.system(command)
    df = pd.read_csv("news.csv")
    news = ""
    dic = {}
    for i in xrange(len(df)):
        if df['text'][i] in dic:
            continue
        dic[df['text'][i]] = 1
        news = news + " " + (df['text'][i])

    fs = FrequencySummarizer()
    final_summary = fs.summarize(news, 10)
    s = "<marquee behavior='scroll' bgcolor='yellow' style='border:solid;'> <h1> Today's Breaking News! <br/> </h1></marquee>"
    for i in final_summary:
        s = s + '<p style="color:red; font-family:garamond"><b>' + i + "</b><br/></p>"
    print s
    return s
Exemplo n.º 5
0
def summarizeURL(url, total_pars):
    url_text = getTextFromURL(url).replace(u"Â", u"").replace(u"â", u"")

    fs = FrequencySummarizer()
    final_summary = fs.summarize(url_text.replace("\n", " "), total_pars)
    return " ".join(final_summary)
	def __init__(self):
		self.fs = FrequencySummarizer()