Пример #1
0
def load_subject_weibo_data():
    """加载客观微博数据
    """
    # topicname = u'外滩踩踏-微博'
    # topicname = u'呼格案-微博'
    # topicname = u'复旦投毒案-微博'
    topicname = u'APEC-微博'

    em = EventManager()
    topicid = em.getEventIDByName(topicname)
    event = Event(topicid)

    # f = open('caitai.jl')
    # f = open('huge.jl')
    # f = open('fudan.jl')
    f = open('apec.jl')
    for line in f:
        item = json.loads(line.strip())
        item['text'] = item['text'].encode('utf-8')
        item = subob_classifier(item)
        if not item['subject']:
            news = subject_weibo2news(item)
            event.saveItem(news)

    f.close()
Пример #2
0
def load_subject_weibo_data():
    """加载客观微博数据
    """
    # topicname = u'外滩踩踏-微博'
    # topicname = u'呼格案-微博'
    # topicname = u'复旦投毒案-微博'
    topicname = u'APEC-微博'

    em = EventManager()
    topicid = em.getEventIDByName(topicname)
    event = Event(topicid)

    # f = open('caitai.jl')
    # f = open('huge.jl')
    # f = open('fudan.jl')
    f = open('apec.jl')
    for line in f:
        item = json.loads(line.strip())
        item['text'] = item['text'].encode('utf-8')
        item = subob_classifier(item)
        if not item['subject']:
            news = subject_weibo2news(item)
            event.saveItem(news)

    f.close()
Пример #3
0
def one_topic_clear(topicname_start):
    topicname, start_datetime = topicname_start
    em = EventManager()
    topicid = em.getEventIDByName(topicname)
    start_ts = datetime2ts(start_datetime)

    event = Event(topicid)
    event.initialize(start_ts)
Пример #4
0
def one_topic_clear(topicname_start):
    topicname, start_datetime = topicname_start
    em = EventManager()
    topicid = em.getEventIDByName(topicname)
    start_ts = datetime2ts(start_datetime)

    event = Event(topicid)
    event.initialize(start_ts)
Пример #5
0
def initializeNewsTopic():
    """初始化新闻话题
    """
    em = EventManager()

    topicname = u'外滩踩踏'
    start_datetime = "2015-01-02 00:00:00"
    topicid = em.getEventIDByName(topicname)
    start_ts = datetime2ts(start_datetime)

    event = Event(topicid)
    event.initialize(start_ts)
Пример #6
0
def initializeNewsTopic():
    """初始化新闻话题
    """
    em = EventManager()

    topicname = u'外滩踩踏'
    start_datetime = "2015-01-02 00:00:00"
    topicid = em.getEventIDByName(topicname)
    start_ts = datetime2ts(start_datetime)

    event = Event(topicid)
    event.initialize(start_ts)
Пример #7
0
def initializeWeiboTopic():
    """初始化weibo话题
    """
    em = EventManager()

    # topicname = u'外滩踩踏-微博'
    # start_datetime = "2015-01-02 00:00:00"
    # topicname = u'呼格案-微博'
    # start_datetime = "2014-12-14 00:00:00"
    # topicname = u'复旦投毒案-微博'
    # start_datetime = "2014-12-15 00:00:00"
    topicname = u'APEC-微博'
    start_datetime = "2014-12-15 00:00:00"

    topicid = em.getEventIDByName(topicname)
    start_ts = datetime2ts(start_datetime)

    event = Event(topicid)
    event.initialize(start_ts)
Пример #8
0
def initializeWeiboTopic():
    """初始化weibo话题
    """
    em = EventManager()

    # topicname = u'外滩踩踏-微博'
    # start_datetime = "2015-01-02 00:00:00"
    # topicname = u'呼格案-微博'
    # start_datetime = "2014-12-14 00:00:00"
    # topicname = u'复旦投毒案-微博'
    # start_datetime = "2014-12-15 00:00:00"
    topicname = u'APEC-微博'
    start_datetime = "2014-12-15 00:00:00"

    topicid = em.getEventIDByName(topicname)
    start_ts = datetime2ts(start_datetime)

    event = Event(topicid)
    event.initialize(start_ts)
Пример #9
0
def load_object_weibo_data():
    """加载主观微博数据
    """
    # topicname = u'外滩踩踏-微博'
    # topicname = u'呼格案-微博'
    # topicname = u'复旦投毒案-微博'
    # topicname = u'APEC-微博'
    # topicname = u'高校宣传思想工作-微博'
    topicname = u'张灵甫遗骨被埋羊圈-微博'

    em = EventManager()
    topicid = em.getEventIDByName(topicname)
    print topicid
    eventcomment = EventComments(topicid)

    # f = open('caitai.jl')
    # f = open('huge.jl')
    # f = open('fudan.jl')
    # f = open('apec.jl')
    # f = open('items_qiushi.jl')
    f = open('items_zhang.jl')
    for line in f:
        item = json.loads(line.strip())
        if 'mid' in item:
            item['text'] = item['text'].encode('utf-8')
            item = subob_classifier(item)
            if item['subject']:
                comment = api_object_weibo2comment(item)
                eventcomment.saveItem(comment)

        """
        item['text'] = item['text'].encode('utf-8')
        item = subob_classifier(item)
        if item['subject']:
            weibo = object_weibo2comment(item)
            eventcomment.saveItem(weibo)
        """

    f.close()
Пример #10
0
def load_object_weibo_data():
    """加载主观微博数据
    """
    # topicname = u'外滩踩踏-微博'
    # topicname = u'呼格案-微博'
    # topicname = u'复旦投毒案-微博'
    # topicname = u'APEC-微博'
    # topicname = u'高校宣传思想工作-微博'
    topicname = u'张灵甫遗骨被埋羊圈-微博'

    em = EventManager()
    topicid = em.getEventIDByName(topicname)
    print topicid
    eventcomment = EventComments(topicid)

    # f = open('caitai.jl')
    # f = open('huge.jl')
    # f = open('fudan.jl')
    # f = open('apec.jl')
    # f = open('items_qiushi.jl')
    f = open('items_zhang.jl')
    for line in f:
        item = json.loads(line.strip())
        if 'mid' in item:
            item['text'] = item['text'].encode('utf-8')
            item = subob_classifier(item)
            if item['subject']:
                comment = api_object_weibo2comment(item)
                eventcomment.saveItem(comment)
        """
        item['text'] = item['text'].encode('utf-8')
        item = subob_classifier(item)
        if item['subject']:
            weibo = object_weibo2comment(item)
            eventcomment.saveItem(weibo)
        """

    f.close()
Пример #11
0
from case.time_utils import ts2datetime, ts2date
from xapian_case.xapian_backend import XapianSearch
from xapian_case.utils import cut, load_scws
from case.dynamic_xapian_weibo import getXapianWeiboByTopic
from case.global_config import XAPIAN_USER_DATA_PATH
from case.Database import Event, EventManager
from case.topic_manage import topics_name_start_end
from flask import Blueprint, url_for, render_template, request, abort, flash, session, redirect, make_response

scws = load_scws()

mod = Blueprint('case', __name__, url_prefix='/index')

xapian_search_weibo = getXapianWeiboByTopic()

em = EventManager()


def acquire_user_by_id(uid):
    user_search = XapianSearch(path=XAPIAN_USER_DATA_PATH,
                               name='master_timeline_user',
                               schema_version=1)
    result = user_search.search_by_id(int(uid),
                                      fields=[
                                          'name', 'location',
                                          'followers_count', 'friends_count',
                                          'profile_image_url'
                                      ])
    user = {}

    if result:
Пример #12
0
                                 top_tfidf_para=10,
                                 top_percent=0.3)

    for res_id, mer_id in mids:
        # 将mer_id下的文本扔入res_id下的簇,remove mer_id的簇,同时重新计算各簇的特征词, 并计算文本权重, 并去重
        temp_infos = event.get_subevent_infos(mer_id)

        for r in temp_infos:
            news = News(r["_id"], event.id)
            news.update_news_subeventid(res_id)

        event.remove_subevents([mer_id])


if __name__ == '__main__':
    em = EventManager()
    event_ids_list = []

    # 获取做初始聚类的话题
    initial_event_ids = em.getInitializingEventIDs()
    event_ids_list.extend([(id, True) for id in initial_event_ids])

    # 获取已做完初始聚类的活跃话题
    active_event_ids = em.checkActive()
    event_ids_list.extend([(id, False) for id in active_event_ids])

    # map并行计算
    pool = Pool()
    pool.map(one_topic_merge, event_ids_list)
    pool.close()
    pool.join()
Пример #13
0
            event.setModifysuccess(True) # 更新事件的modify_success为True
        except Exception, e:
            # 如果做计算时出错,更新last_modify, 并将modify_success设置为False
            print '[Error]: ', e
            event.setLastmodify(timestamp)
            event.setModifysuccess(False)


if __name__ == '__main__':
    from bson.objectid import ObjectId
    # running_ids = [ObjectId("54c4df61d8b487851c2434f6"), ObjectId("54c34b3d2253270fd4dd5598"), \
    # running_ids = [ObjectId("54c5105fd8b487851c2434f7"), ObjectId("54c59c19d8b487851c2434f8")]
    running_ids = [ObjectId("54cb0b472253277627a8ac43")]
    # running_ids = [ObjectId("54cb259e2253277bca996516")]

    em = EventManager()
    event_ids_list = []

    # 获取做初始聚类的话题
    initial_event_ids = em.getInitializingEventIDs()
    event_ids_list.extend([(id, True) for id in initial_event_ids if id in running_ids])

    # 获取已做完初始聚类的活跃话题
    active_event_ids = em.checkActive()
    event_ids_list.extend([(id, False) for id in active_event_ids if id in running_ids])

    # map并行计算
    pool = Pool()
    pool.map(one_topic_calculation, event_ids_list)
    pool.close()
    pool.join()
Пример #14
0
#-*-coding=utf-8-*-
# User: linhaobuaa
# Date: 2015-01-02 12:00:00
# Version: 0.1.0
"""处理演化过程中出错的话题
"""

from Database import EventManager, Event
from run import one_topic_calculation


def handle_error(eventid):
    """
    """
    event = Event(eventid)
    last_modify = event.getLastmodify()

    event.setLastmodify(last_modify - 3600)
    event.setModifysuccess(True)

    # one_topic_calculation((eventid, False))


if __name__ == '__main__':
    em = EventManager()
    # event_ids_list = em.getFalseEventIDs()
    event_ids_list = em.getAllEventIDs()
    map(handle_error, event_ids_list)