top_percent=0.3) for res_id, mer_id in mids: # 将mer_id下的文本扔入res_id下的簇,remove mer_id的簇,同时重新计算各簇的特征词, 并计算文本权重, 并去重 temp_infos = event.get_subevent_infos(mer_id) for r in temp_infos: news = News(r["_id"], event.id) news.update_news_subeventid(res_id) event.remove_subevents([mer_id]) if __name__ == '__main__': em = EventManager() event_ids_list = [] # 获取做初始聚类的话题 initial_event_ids = em.getInitializingEventIDs() event_ids_list.extend([(id, True) for id in initial_event_ids]) # 获取已做完初始聚类的活跃话题 active_event_ids = em.checkActive() event_ids_list.extend([(id, False) for id in active_event_ids]) # map并行计算 pool = Pool() pool.map(one_topic_merge, event_ids_list) pool.close() pool.join()
# 如果做计算时出错,更新last_modify, 并将modify_success设置为False print '[Error]: ', e event.setLastmodify(timestamp) event.setModifysuccess(False) if __name__ == '__main__': from bson.objectid import ObjectId # running_ids = [ObjectId("54c4df61d8b487851c2434f6"), ObjectId("54c34b3d2253270fd4dd5598"), \ # running_ids = [ObjectId("54c5105fd8b487851c2434f7"), ObjectId("54c59c19d8b487851c2434f8")] running_ids = [ObjectId("54cb0b472253277627a8ac43")] # running_ids = [ObjectId("54cb259e2253277bca996516")] em = EventManager() event_ids_list = [] # 获取做初始聚类的话题 initial_event_ids = em.getInitializingEventIDs() event_ids_list.extend([(id, True) for id in initial_event_ids if id in running_ids]) # 获取已做完初始聚类的活跃话题 active_event_ids = em.checkActive() event_ids_list.extend([(id, False) for id in active_event_ids if id in running_ids]) # map并行计算 pool = Pool() pool.map(one_topic_calculation, event_ids_list) pool.close() pool.join()