示例#1
0
文件: setup.py 项目: ericazf/IREngine
	# print(max_page)
	return (max_page)

def crawling():
	print('-----start crawling time: %s-----'%(datetime.today()))
	config = configparser.ConfigParser()
	config.read('../config.ini', 'utf-8')
	root = 'http://news.sohu.com/1/0903/61/subject212846158'
	max_page = get_max_page(root + '.shtml')
	news_pool = get_news_pool(root, max_page, max_page - 5)
	crawl_news(news_pool, 140, config['DEFAULT']['doc_dir_path'], config['DEFAULT']['doc_encoding'])

if __name__ == "__main__":
	print('-----start time:%s-----'%(datetime.today()))

	# 抓取新闻数据
	# crawling()

	# 构建索引
	print('-----start indexing time: %s-----'%(datetime.today()))
	im = IndexModule('../config.ini', 'utf-8')
	im.construct_postings_lists()

	# 推荐阅读
	print('-----start recommending time: %s-----'%(datetime.today()))
	rm = RecommendationModule('../config.ini', 'utf-8')
	rm.find_k_nearest(5, 25)
	print('-----finish time: %s-----'%(datetime.today()))