示例#1
0
                                 top_percent=0.3)

    for res_id, mer_id in mids:
        # 将mer_id下的文本扔入res_id下的簇,remove mer_id的簇,同时重新计算各簇的特征词, 并计算文本权重, 并去重
        temp_infos = event.get_subevent_infos(mer_id)

        for r in temp_infos:
            news = News(r["_id"], event.id)
            news.update_news_subeventid(res_id)

        event.remove_subevents([mer_id])


if __name__ == '__main__':
    em = EventManager()
    event_ids_list = []

    # 获取做初始聚类的话题
    initial_event_ids = em.getInitializingEventIDs()
    event_ids_list.extend([(id, True) for id in initial_event_ids])

    # 获取已做完初始聚类的活跃话题
    active_event_ids = em.checkActive()
    event_ids_list.extend([(id, False) for id in active_event_ids])

    # map并行计算
    pool = Pool()
    pool.map(one_topic_merge, event_ids_list)
    pool.close()
    pool.join()
示例#2
0
            # 如果做计算时出错,更新last_modify, 并将modify_success设置为False
            print '[Error]: ', e
            event.setLastmodify(timestamp)
            event.setModifysuccess(False)


if __name__ == '__main__':
    from bson.objectid import ObjectId
    # running_ids = [ObjectId("54c4df61d8b487851c2434f6"), ObjectId("54c34b3d2253270fd4dd5598"), \
    # running_ids = [ObjectId("54c5105fd8b487851c2434f7"), ObjectId("54c59c19d8b487851c2434f8")]
    running_ids = [ObjectId("54cb0b472253277627a8ac43")]
    # running_ids = [ObjectId("54cb259e2253277bca996516")]

    em = EventManager()
    event_ids_list = []

    # 获取做初始聚类的话题
    initial_event_ids = em.getInitializingEventIDs()
    event_ids_list.extend([(id, True) for id in initial_event_ids if id in running_ids])

    # 获取已做完初始聚类的活跃话题
    active_event_ids = em.checkActive()
    event_ids_list.extend([(id, False) for id in active_event_ids if id in running_ids])

    # map并行计算
    pool = Pool()
    pool.map(one_topic_calculation, event_ids_list)
    pool.close()
    pool.join()