def acquire_topic_id(name, start_ts, end_ts, module="identify"): item = db.session.query(TopicStatus).filter_by(topic=name, start=start_ts, end=end_ts, module=module).first() if not item: # 若item不存在TopicStatus说明是新插入的,进行插入-----完成通过前端用户提交的要计算的topic数据 item = TopicStatus(module, -1, topic, start_ts, end_ts, int(time.time())) db.session.add(item) db.session.commit() return item.id
def _update_topic_status2Completed(topic, start, end, db_date, module='identify'): # 更新:正在计算>>已完成计算 item = TopicStatus(module, COMPLETED_STATUS, topic, start, end, db_date) item_exist = db.session.query(TopicStatus).filter(TopicStatus.module==module, \ TopicStatus.status==IN_CALC_STATUS, \ TopicStatus.topic==topic, \ TopicStatus.start==start, \ TopicStatus.end==end, \ TopicStatus.db_date==db_date).first() if item_exist: db.session.delete(item_exist) db.session.add(item) db.session.commit()
start_ts = datetime2ts(NEWS_START_TS) end_ts = datetime2ts(NEWS_END_TS) # deal with the start_ts/end_ts is not the whole day if start_ts - datetime2ts(ts2datetime(start_ts)) != 0: start_ts = datetime2ts(ts2datetime(start_ts)) if end_ts - datetime2ts(ts2datetime(end_ts)) != 0: end_ts = datetime2ts(ts2datetime(end_ts)) + 3600 * 24 print 'start_ts, end_ts:', start_ts, end_ts db_date = int(time.time()) # 创建topics中得话题 ''' save_t = Topics(topic, start_ts, end_ts) save_t_exist = db.session.query(Topics).filter(Topics.topic==topic ,\ Topics.start_ts==start_ts ,\ Topics.end_ts==end_ts).first() if save_t_exist: db.session.delete(save_t_exist) db.session.add(save_t) db.session.commit() ''' save_t_s = TopicStatus(module, status, topic, start_ts, end_ts, db_date) save_t_s_exist = db.session.query(TopicStatus).filter(TopicStatus.module==module, \ TopicStatus.start==start_ts, \ TopicStatus.end==end_ts).first() if save_t_s_exist: db.session.delete(save_t_s_exist) db.session.add(save_t_s) db.session.commit() main(topic, start_ts, end_ts)
quota_coverage(topic, xapian_search_weibo, start_ts=start, end_ts=end) # 覆盖度计算 print 'save coverage success' quota_person_sensitivity(topic, xapian_search_weibo, start_ts=start, end_ts=end) # 敏感人物参与度 print 'save person_sensitivity success' # 考虑怎么把使用数据相似性很高的合并在一起,减少检索的次数 def worker(topic, start, end): print 'topic: ', topic.encode('utf8'), 'start:', start, 'end:', end cal_topic_quotasystem_count_by_date(topic, start, end) if __name__=='__main__': module = 'quota_sysytem' status = -1 topic = u'东盟,博览会' start = datetime2ts('2013-09-02') end = datetime2ts('2013-09-05') + Day db_date = int(time.time()) # 入库时间 save_item = TopicStatus(module, status, topic, start, end, db_date) db.session.add(save_item) db.session.commit() attention_exp = {'folk':100, 'media':100, 'other':100, 'opinion_leader':100, 'oversea':100} # 此处仅对经验值进行初始化,需要管理员根据具体情况进行修改 duration_exp = 5 * Day coverage_exp = 3000 save_exp(topic, start, end, attention_exp, duration_exp, coverage_exp) # 给关注度经验值和持续度经验值默认值 save_sensitivity(topic, start, end) # 给类型敏感词表、词汇敏感词表、地点敏感词表进行初始化----三张表中均为每个话题一条记录 print 'start_worker' worker(topic, start, end) ComputeIndex(topic, start, end)
if __name__ == '__main__': #module_t_s = 'identify' status = -1 #topic = u'高校思想宣传' #start = datetime2ts('2015-01-23') #end = datetime2ts('2015-01-31') + Day module_t_s = MODULE_T_S topic = TOPIC start = datetime2ts(START) end = datetime2ts(END) save_topics = Topics(topic, start, end) save_topics_exist = db.session.query(Topics).filter(Topics.topic==topic ,\ Topics.start_ts==start ,\ Topics.end_ts==end).first() if save_topics_exist: db.session.delete(save_topics_exist) db.session.add(save_topics) db.session.commit() db_date = int(time.time()) save_t_s = TopicStatus(module_t_s, status, topic, start, end, db_date) save_t_s_exist = db.session.query(TopicStatus).filter(TopicStatus.module==module_t_s, TopicStatus.topic==topic ,\ TopicStatus.start==start, TopicStatus.end==end).first() if save_t_s_exist: db.session.delete(save_t_s_exist) db.session.add(save_t_s) db.session.commit() main(topic, start, end)