Пример #1
0
@author: hao
'''
import os, sys
from sys import path
path.append(path[0] + '/tools/')
import MySQLdb
import pymongo
import jieba.analyse
import time
import datetime
# 工具类
from remove import removeIrrelevant
reload(sys)
sys.setdefaultencoding('utf8')

rmIrr = removeIrrelevant()


def addTwitterTable(mongodbIP,
                    mysqlhostIP,
                    mysqlUserName='******',
                    mysqlPassword='******',
                    dbname='cctvTimer'):

    sqlConn = MySQLdb.connect(host=mysqlhostIP,
                              user=mysqlUserName,
                              passwd=mysqlPassword,
                              db=dbname,
                              charset='utf8')
    sqlcursor = sqlConn.cursor()
Пример #2
0
def addcustomerEvaluation_informal(baseurl,
                                   mysqlhostIP,
                                   mysqlUserName='******',
                                   mysqlPassword='',
                                   dbname='btv_v2'):

    # 读停用词
    path = os.path.abspath(os.path.dirname(sys.argv[0]))
    dicFile = open(path + '/tools/NTUSD_simplified/stopwords.txt', 'r')
    stopwords = dicFile.readlines()
    stopwordList = []
    stopwordList.append(' ')
    for stopword in stopwords:
        temp = stopword.strip().replace('\r\n', '').decode('utf8')
        stopwordList.append(temp)
    dicFile.close()

    # 连接数据库
    sqlConn = MySQLdb.connect(host=mysqlhostIP,
                              user=mysqlUserName,
                              passwd=mysqlPassword,
                              db=dbname,
                              charset='utf8')
    sqlcursor = sqlConn.cursor()

    sqlcursor.execute(
        '''CREATE TABLE IF NOT EXISTS customer_evaluation(pk bigint NOT NULL PRIMARY KEY AUTO_INCREMENT, flag int(1), evaluation bigint(20), content varchar(200), date Date,
                    program_id varchar(200), program varchar(200)) DEFAULT CHARSET=utf8;'''
    )
    print '新建库成功'
    # 时间属性
    # inter为0,即为当日
    # 库中是2.29
    inter = 1
    now = int(time.time()) - 86400 * inter
    timeArray = time.localtime(now)
    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    print otherStyleTime
    # **********new by Ivy**********************************************
    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    tablename_index = "DATA:WEIBO_POST_Keywords_INDEX"
    prefix = otherStyleTime + "*"
    # print baseurl + "/" + tablename_index + "/" + prefix
    r_index = requests.get(baseurl + "/" + tablename_index + "/" + prefix,
                           auth=kerberos_auth,
                           headers={"Accept": "application/json"})
    if issuccessful(r_index) == False:
        print "Could not get messages from HBase. Text was:\n" + r_index.text
    # print base64.b64decode(r.text)
    # print r_index.text
    bleats_0 = json.loads(r_index.text)
    row_key_name_list = list()
    for row in bleats_0['Row']:
        row_key = base64.b64decode(row['key'])
        row_key_name = row_key.split("-")[3]
        # print row_key, row_key_name
        row_key_name_list.append(row_key_name)
    print len(row_key_name_list)
    # **********new by Ivy**********************************************
    # 存储评论数据
    # 处理情感
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()
    sqlcursor.execute(
        "SELECT DISTINCT(program_name) from ini_app_program_source_rel")
    # sqlcursor.execute("SELECT program_name from ini_app_program_source_rel where program_name = '跨界喜剧王' limit 1;")
    bufferTemp = sqlcursor.fetchall()
    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    # tablename = "DATA:WEIBO_POST_Keywords"
    # sqlcursor.execute('''select source_path from ini_app_program_source_rel where program_name = '军情解码' and app_name = '观众口碑';''')
    for one_program in bufferTemp:
        commentsData = []
        tempData = []
        one_program = one_program[0].encode('utf8')
        if one_program == '中国好歌曲第三季':
            otherStyleTime = '2015-10-26'

        # print one_program
        emotionsWord = []
        emotionsScore = 0
        count = 0
        printCount = 0
        sqlcursor.execute(
            """select source_path from ini_app_program_source_rel where app_name = '观众口碑' and program_name = %s""",
            (one_program, ))
        source_path = sqlcursor.fetchone()
        tablename = source_path[0]
        # print one_program,"source_path", source_path
        print one_program
        sqlcursor.execute(
            '''SELECT program_id from ini_app_program_source_rel where program_name = %s limit 1;''',
            (one_program, ))
        bufferTemp = sqlcursor.fetchone()
        program_id = bufferTemp[0]
        # **********new by Ivy**********************************************
        for single_rowkey in row_key_name_list:
            r = requests.get(baseurl + "/" + tablename + "/" + single_rowkey,
                             auth=kerberos_auth,
                             headers={"Accept": "application/json"})
            if issuccessful(r) == False:
                print "Could not get messages from HBase. Text was:\n" + r.text
            # print r.text
            bleats = json.loads(r.text)
            # **********new by Ivy**********************************************
            # print program_id
            # bleats is json file
            for row in bleats['Row']:
                flag = True
                for cell in row['Cell']:
                    columnname = base64.b64decode(cell['column'])
                    value = cell['$']
                    if value == None:
                        print 'none'
                        continue
                    if columnname == "base_info:match":
                        column = base64.b64decode(value)
                        if (one_program not in column):
                            flag = False
                            break
                    if columnname == "base_info:cdate":
                        cdate = base64.b64decode(value)
                        cdate = cdate.split('T')[0]
                        # if cdate != otherStyleTime:
                        #     flag = False
                        #     break
                    if columnname == "base_info:text":
                        content = base64.b64decode(value)
                        # print 'hh', content
                if flag:
                    # print column, cdate, content
                    # 暂时没有program_id
                    # program_id = data['base_info:program_id']
                    count += 1
                    printCount += 1
                    # 处理每一条
                    #     情感关键词
                    (emotionsWord, emotionsScore) = emProcess.processSentence(
                        rmIrr.removeEverythingButEmotion(content))
                    #     倾向性判断flag:1是正面,0是中性,-1是负面

                    #     情感极性判断,这里我限制了更严格的条件
                    if emotionsScore > 0:
                        tempData.append('1')
                    elif emotionsScore == 0:
                        tempData.append('0')
                    elif emotionsScore < 0:
                        tempData.append('-1')
            #         情感得分sentimentScore
                    tempData.append(emotionsScore)
                    # 评论内容
                    tempData.append(content)
                    #     日期时间
                    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
                    tempData.append(otherStyleTime)
                    # 栏目id
                    tempData.append(program_id)
                    # 栏目名称
                    tempData.append(one_program)
                    #     转换为元组

                    sqlcursor.execute(
                        '''insert into customer_evaluation(flag, evaluation, content, date, program_id, program)
                                values (%s, %s, %s, %s, %s, %s)''', tempData)
                    sqlConn.commit()
                    tempData = []
    sqlConn.close()
Пример #3
0
def mentioned_trend(baseurl,
                    mysqlhostIP,
                    mysqlUserName='******',
                    mysqlPassword='',
                    dbname='btv'):
    # 连接数据库
    sqlConn = MySQLdb.connect(host=mysqlhostIP,
                              user=mysqlUserName,
                              passwd=mysqlPassword,
                              db=dbname,
                              charset='utf8')
    sqlcursor = sqlConn.cursor()
    sqlcursor.execute(
        '''CREATE TABLE IF NOT EXISTS gala_discussion(pk bigint NOT NULL PRIMARY KEY AUTO_INCREMENT, disscussion bigint(50), specific_time datetime, neutral double, positive double, program_id varchar(50), program varchar(50)) DEFAULT CHARSET=utf8;'''
    )
    print '新建库成功'
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()

    # 连接hbase数据库
    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    # 表名
    tablename = "DATA:WEIBO_POST_Keywords"
    r = requests.get(baseurl + "/" + tablename + "/*",
                     auth=kerberos_auth,
                     headers={"Accept": "application/json"})
    if issuccessful(r) == False:
        print "Could not get messages from HBase. Text was:\n" + r.text
    quit()
    bleats = json.loads(r.text)

    tempData = []
    positive_count = 0
    neutral_count = 0
    total_count = 0
    # 时间属性
    # inter为0,即为当日
    inter = 0
    now = int(time.time()) - 86400 * inter
    timeArray = time.localtime(now)
    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    print otherStyleTime

    # bleats is json file
    for row in bleats['Row']:
        for cell in row['Cell']:
            columnname = base64.b64decode(cell['column'])
            value = cell['$']
            if value == None:
                print 'none'
                continue
            if columnname == "base_info:match":
                column = base64.b64decode(value)
                if column == "春晚":
                    if columnname == "'base_info:text'":
                        content = base64.b64decode(value)
                        if columnname == "'base_info:cdate'":
                            cdate = base64.b64decode(value)
                            ddate = cdate
                            ddate = ddate.split(' ')[0]
                            if ddate == otherStyleTime:
                                cdate = str(cdate) + ':00'
                                # cdate = cdate.split(' ')[0]
                                total_count += 1
                                (emotionsWord,
                                 emotionsScore) = emProcess.processSentence(
                                     rmIrr.removeEverythingButEmotion(content))
                                #     倾向性判断flag:1是正面,0是中性,-1是负面
                                #     情感极性判断,这里我限制了更严格的条件
                                if emotionsScore > 0:
                                    positive_count += 1
                                elif emotionsScore == 0:
                                    neutral_count += 1
    print "po", positive_count, 'ne', neutral_count, "total", total_count
    positive_count = float(positive_count) / float(total_count)
    neutral_count = float(neutral_count) / float(total_count)
    print 'tt', type(neutral_count), neutral_count
    # disscussion
    tempData.append(total_count)
    # specific_time???需要修改
    tempData.append(cdate)
    # neutral
    tempData.append(neutral_count)
    # positive
    tempData.append(positive_count)
    # program_id
    tempData.append('100')
    # program
    tempData.append('2016年北京卫视春节联欢晚会')
    sqlcursor.execute(
        '''insert into gala_discussion(disscussion, specific_time, neutral, positive, program_id, program) values (%s, %s, %s, %s, %s, %s)''',
        tempData)
    sqlConn.commit()
    tempData = []
    sqlConn.close()
Пример #4
0
def addcustomerEvaluation_informal(baseurl,
                                   mysqlhostIP,
                                   mysqlUserName='******',
                                   mysqlPassword='',
                                   dbname='btv'):

    # 读停用词
    path = os.path.abspath(os.path.dirname(sys.argv[0]))
    dicFile = open(path + '/tools/NTUSD_simplified/stopwords.txt', 'r')
    stopwords = dicFile.readlines()
    stopwordList = []
    stopwordList.append(' ')
    for stopword in stopwords:
        temp = stopword.strip().replace('\r\n', '').decode('utf8')
        stopwordList.append(temp)
    dicFile.close()

    # 连接数据库
    sqlConn = MySQLdb.connect(host=mysqlhostIP,
                              user=mysqlUserName,
                              passwd=mysqlPassword,
                              db=dbname,
                              charset='utf8')
    sqlcursor = sqlConn.cursor()

    sqlcursor.execute(
        '''CREATE TABLE IF NOT EXISTS customer_evaluation(pk bigint NOT NULL PRIMARY KEY AUTO_INCREMENT, flag int(1), evaluation bigint(20), content varchar(200), date Date,
                    program_id varchar(200), program varchar(200)) DEFAULT CHARSET=utf8;'''
    )
    print '新建库成功'
    # 时间属性
    # inter为0,即为当日
    # 库中是2.29
    inter = 37
    now = int(time.time()) - 86400 * inter
    timeArray = time.localtime(now)
    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    print otherStyleTime
    # 存储评论数据
    source_1 = 'weibo'
    # 处理情感
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()
    # 首先获取栏目,注意栏目及相关hbase存储信息有更新,请在hbase_info做同步
    # print 'SELECT DISTINCT(program) from hbase_info where source = %s' %source_1
    sqlcursor.execute("SELECT DISTINCT(program) from hbase_info;")
    bufferTemp = sqlcursor.fetchall()
    # print len(bufferTemp)

    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    tablename = "DATA:WEIBO_POST_Keywords"
    r = requests.get(baseurl + "/" + tablename + "/*",
                     auth=kerberos_auth,
                     headers={"Accept": "application/json"})
    if issuccessful(r) == False:
        print "Could not get messages from HBase. Text was:\n" + r.text
    # quit()
    bleats = json.loads(r.text)

    for one_program in bufferTemp:
        commentsData = []
        tempData = []
        one_program = one_program[0].encode('utf8')
        print type(one_program), one_program
        emotionsWord = []
        emotionsScore = 0
        count = 0
        printCount = 0
        sqlcursor.execute(
            '''SELECT program_id from competition_analysis where program = %s''',
            (one_program, ))
        bufferTemp = sqlcursor.fetchone()
        program_id = bufferTemp[0]
        print program_id
        # row_prefix, limit可以限定次数
        # bleats is json file
        for row in bleats['Row']:
            for cell in row['Cell']:
                columnname = base64.b64decode(cell['column'])
                value = cell['$']
                if value == None:
                    print 'none'
                    continue
                if columnname == "base_info:match":
                    column = base64.b64decode(value)
                if column == one_program:
                    if columnname == "'base_info:cdate'":
                        date_created = base64.b64decode(value)
                        if date_created == otherStyleTime:
                            if columnname == "'base_info:text'":
                                content = base64.b64decode(value)
                                print 'q', content
                                # 暂时没有program_id
                                # program_id = data['base_info:program_id']
                                count += 1
                                printCount += 1
                                # 处理每一条
                                #     情感关键词
                                (emotionsWord,
                                 emotionsScore) = emProcess.processSentence(
                                     rmIrr.removeEverythingButEmotion(content))
                                #     倾向性判断flag:1是正面,0是中性,-1是负面

                                #     情感极性判断,这里我限制了更严格的条件
                                if emotionsScore > 0:
                                    tempData.append('1')
                                elif emotionsScore == 0:
                                    tempData.append('0')
                                elif emotionsScore < 0:
                                    tempData.append('-1')
                        #         情感得分sentimentScore
                                tempData.append(emotionsScore)
                                # 评论内容
                                tempData.append(content)
                                #     日期时间
                                tempData.append(otherStyleTime)
                                # 栏目id
                                tempData.append(program_id)
                                # 栏目名称
                                tempData.append(one_program)
                                #     转换为元组
                                commentsData.append(tuple(tempData))
                                tempData = []
                                sqlcursor.execute(
                                    '''insert into customer_evaluation(flag, evaluation, content, date, program_id, program)
                                            values (%s, %s, %s, %s, %s, %s)''',
                                    commentsData)
                                sqlConn.commit()
                                commentsData = []
    sqlConn.close()
Пример #5
0
def addcustomerEvaluation_informal(baseurl,
                                   mysqlhostIP,
                                   mysqlUserName='******',
                                   mysqlPassword='',
                                   dbname='btv_v2'):

    # 读停用词
    path = os.path.abspath(os.path.dirname(sys.argv[0]))
    dicFile = open(path + '/tools/NTUSD_simplified/stopwords.txt', 'r')
    stopwords = dicFile.readlines()
    stopwordList = []
    stopwordList.append(' ')
    for stopword in stopwords:
        temp = stopword.strip().replace('\r\n', '').decode('utf8')
        stopwordList.append(temp)
    dicFile.close()

    # 连接数据库
    sqlConn = MySQLdb.connect(host=mysqlhostIP,
                              user=mysqlUserName,
                              passwd=mysqlPassword,
                              db=dbname,
                              charset='utf8')
    sqlcursor = sqlConn.cursor()

    sqlcursor.execute(
        '''CREATE TABLE IF NOT EXISTS vedio_index(pk bigint NOT NULL PRIMARY KEY AUTO_INCREMENT, total_index bigint(20), play_count bigint(20), comment_count bigint(20), date Date,
                    program_id varchar(200), program varchar(200)) DEFAULT CHARSET=utf8;'''
    )
    print '新建库成功'
    # 时间属性
    # inter为0,即为当日
    # 库中是2.29
    inter = 1
    now = int(time.time()) - 86400 * inter
    timeArray = time.localtime(now)
    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    print otherStyleTime
    # **********new by Ivy**********************************************
    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    tablename_index = "DATA:VIDEO_ZONGYI_INDEX"
    prefix = otherStyleTime + "*"
    # print baseurl + "/" + tablename_index + "/" + prefix
    r_index = requests.get(baseurl + "/" + tablename_index + "/" + prefix,
                           auth=kerberos_auth,
                           headers={"Accept": "application/json"})
    if issuccessful(r_index) == False:
        print "Could not get messages from HBase table VIDEO_ZONGYI_INDEX. Text was:\n" + r_index.text
    # print base64.b64decode(r.text)
    # print r_index.text
    bleats_0 = json.loads(r_index.text)
    row_key_name_list = list()
    for row in bleats_0['Row']:
        row_key = base64.b64decode(row['key'])
        row_key_name = row_key.split("-")[3]
        if row_key_name not in row_key_name_list:
            row_key_name_list.append(row_key_name)
    print len(row_key_name_list)
    # **********new by Ivy**********************************************
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()
    sqlcursor.execute(
        "SELECT program_name from ini_app_program_source_rel where program_name = '跨界喜剧王' limit 1;"
    )
    bufferTemp = sqlcursor.fetchall()
    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    for one_program in bufferTemp:
        commentsData = []
        tempData = []
        play_count = 0
        comment_count = 0
        play_count_box = list()
        comment_count_box = list()
        one_program = one_program[0].encode('utf8')
        if one_program == '中国好歌曲第三季':
            otherStyleTime = '2015-10-26'
        tablename = "DATA:VIDEO_ZONGYI"
        print one_program
        sqlcursor.execute(
            '''SELECT program_id from ini_app_program_source_rel where program_name = %s limit 1;''',
            (one_program, ))
        bufferTemp = sqlcursor.fetchone()
        program_id = bufferTemp[0]
        # **********new by Ivy**********************************************
        for single_rowkey in row_key_name_list:
            r = requests.get(baseurl + "/" + tablename + "/" + single_rowkey +
                             "*",
                             auth=kerberos_auth,
                             headers={"Accept": "application/json"})
            if issuccessful(r) == False:
                print "Could not get messages from HBase. Text was:\n" + r.text
            # print r.text
            try:
                bleats = json.loads(r.text)
            except:
                pass
        # **********new by Ivy**********************************************
        # bleats is json file
            vedio_name = ""
            for row in bleats['Row']:
                flag = True
                for cell in row['Cell']:
                    columnname = base64.b64decode(cell['column'])
                    value = cell['$']
                    if value == None:
                        print 'none'
                        continue
                    if columnname == "base_info:column_name":
                        column = base64.b64decode(value)
                        if (one_program not in column):
                            flag = False
                            break
                    if columnname == "base_info:play_count":
                        play_count = base64.b64decode(value)
                        play_count = int(play_count)
                    if columnname == "base_info:comment_count":
                        comment_count = base64.b64decode(value)
                        comment_count = int(comment_count)
                    if columnname == "base_info:name":
                        vedio_name = base64.b64decode(value)
                if flag:
                    play_count_box.append(play_count)
                    comment_count_box.append(comment_count)
        # 播放指数
        total_play_count = sum(play_count_box)
        total_comment_count = sum(comment_count_box)
        total_index = total_play_count / 10000000 * 2 + total_comment_count / 100
        print "total_index", total_index
        tempData.append(total_index)
        # 播放次数
        tempData.append(total_play_count)
        # 评论次数
        tempData.append(total_comment_count)
        #     日期时间
        otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
        tempData.append(otherStyleTime)
        # 栏目id
        tempData.append(program_id)
        # 栏目名称
        tempData.append(one_program)
        #     转换为元组
        sqlcursor.execute(
            '''insert into vedio_index(total_index, play_count, comment_count, date, program_id, program)
                    values (%s, %s, %s, %s, %s, %s)''', tempData)
        sqlConn.commit()
        tempData = []
    sqlConn.close()
Пример #6
0
def addcustomerEvaluation_informal(baseurl,mysqlhostIP, mysqlUserName = '******', mysqlPassword = '', dbname = 'btv_v2'):
    path = os.path.abspath(os.path.dirname(sys.argv[0]))
    dicFile = open(path+'/tools/NTUSD_simplified/stopwords.txt','r')
    stopwords = dicFile.readlines()
    stopwordList = []
    stopwordList.append(' ')
    for stopword in stopwords:
        temp = stopword.strip().replace('\r\n','').decode('utf8')
        stopwordList.append(temp)
    dicFile.close() 
    sqlConn=MySQLdb.connect(host=mysqlhostIP, user=mysqlUserName, passwd=mysqlPassword, db = dbname, charset='utf8')
    sqlcursor = sqlConn.cursor()
    sqlcursor.execute('''CREATE TABLE IF NOT EXISTS media_evaluation(pk bigint NOT NULL PRIMARY KEY AUTO_INCREMENT, flag int(1), evaluation bigint(20), content varchar(200), date Date,
                    program_id varchar(200), program varchar(200)) DEFAULT CHARSET=utf8;''')
    print '新建库成功'
    inter = 2
    now = int(time.time())-86400*inter
    timeArray = time.localtime(now)
    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    print otherStyleTime
    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()
    sqlcursor.execute("SELECT DISTINCT(program_name) from ini_app_program_source_rel")
    bufferTemp = sqlcursor.fetchall()
    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    tibeid_name = dict()
    tiba_id_box = list()
    for one_program in bufferTemp:
        commentsData = []
        tempData = []
        one_program = one_program[0].encode('utf8')
        # print one_program
        emotionsWord = []
        emotionsScore = 0
        count = 0
        printCount = 0
        tablename = "DATA:TIEBA_SITE"
        prefix_0 = otherStyleTime + "*"
        r = requests.get(baseurl + "/" + tablename + "/" + prefix_0,  auth=kerberos_auth, headers = {"Accept" : "application/json"})
        if issuccessful(r) == False:
            print "Could not get messages from HBase. Text was:\n" + r.text
        bleats = json.loads(r.text)
        for row in bleats['Row']:
            flag = True
            row_key = base64.b64decode(row['key'])
            tiba_id = row_key.split("-")[3]
            for cell in row['Cell']:
                columnname = base64.b64decode(cell['column'])
                value = cell['$']
                if value == None:
                    print 'none'
                    continue
                if columnname == "base_info:name":
                    tieba_name = base64.b64decode(value)
                    if (one_program  not in tieba_name):
                        flag = False
                        break
            if flag:
                tibeid_name[tiba_id] = tieba_name
                if tiba_id not in tiba_id_box:
                    tiba_id_box.append(tiba_id_box)
    print "len(tiba_id_box)", len(tiba_id_box)
    tablename = "DATA:TIEBA_SITE_Reply"
    for one_tieba_id in tiba_id_box:
        for i,j in tibeid_name.iteritems():
            if one_tieba_id == i:
                one_program = j
        prefix_1 = otherStyleTime + "-" + str(i) + "*"
        r = requests.get(baseurl + "/" + tablename + "/" + prefix_1,  auth=kerberos_auth, headers = {"Accept" : "application/json"})
        if issuccessful(r) == False:
            print "Could not get messages from HBase. Text was:\n" + r.text
        bleats = json.loads(r.text)
        sqlcursor.execute('''SELECT program_id from ini_app_program_source_rel where program_name = %s limit 1;''', (one_program,))
        bufferTemp = sqlcursor.fetchone()
        program_id = bufferTemp[0]
        for row in bleats['Row']:
            flag = True
            for cell in row['Cell']:
                columnname = base64.b64decode(cell['column'])
                value = cell['$']
                if value == None:
                    print 'none'
                    continue
                if columnname == "base_info:cdate":
                    cdate = base64.b64decode(value)
                    cdate = cdate.split('T')[0]
                if columnname == "base_info:content":
                    content = base64.b64decode(value)
            if flag:
                (emotionsWord,emotionsScore) = emProcess.processSentence(rmIrr.removeEverythingButEmotion(content))
                if emotionsScore>0:
                    tempData.append('1')
                elif emotionsScore==0:
                    tempData.append('0')
                elif emotionsScore<0:
                    tempData.append('-1')
        #         情感得分sentimentScore
                tempData.append(emotionsScore)
                # 评论内容
                tempData.append(content)
            #     日期时间
                otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
                tempData.append(otherStyleTime)
                # 栏目id
                tempData.append(program_id)
                # 栏目名称
                tempData.append(one_program)
            #     转换为元组

                sqlcursor.execute('''insert into media_evaluation(flag, evaluation, content, date, program_id, program)
                            values (%s, %s, %s, %s, %s, %s)''',tempData)
                sqlConn.commit()
                tempData = []
    sqlConn.close()
Пример #7
0
def addcustomerEvaluation_informal(hbaseIP,
                                   mysqlhostIP,
                                   mysqlUserName='******',
                                   mysqlPassword='',
                                   dbname='btv'):
    # 读停用词
    path = os.path.abspath(os.path.dirname(sys.argv[0]))
    dicFile = open(path + '/tools/NTUSD_simplified/stopwords.txt', 'r')
    stopwords = dicFile.readlines()
    stopwordList = []
    stopwordList.append(' ')
    for stopword in stopwords:
        temp = stopword.strip().replace('\r\n', '').decode('utf8')
        stopwordList.append(temp)
    dicFile.close()

    # 分词
    jieba.initialize()
    source_1 = 'weibo'
    # 连接数据库
    sqlConn = MySQLdb.connect(host=mysqlhostIP,
                              user=mysqlUserName,
                              passwd=mysqlPassword,
                              db=dbname,
                              charset='utf8')
    sqlcursor = sqlConn.cursor()

    sqlcursor.execute(
        '''CREATE TABLE IF NOT EXISTS media_evaluation(pk bigint NOT NULL PRIMARY KEY AUTO_INCREMENT, flag int(1), evaluation bigint(20), content varchar(200), date Date,
                    program_id varchar(200), program varchar(200)) DEFAULT CHARSET=utf8;'''
    )
    print '新建库成功'
    # 时间属性
    # inter为0,即为当日
    # 库中是2.29
    inter = 37
    now = int(time.time()) - 86400 * inter
    timeArray = time.localtime(now)
    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    print otherStyleTime
    # 连接hbase数据库
    conn = happybase.Connection(hbaseIP)
    conn.open()
    # 存储评论数据

    # 处理情感
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()
    # 首先获取栏目,注意栏目及相关hbase存储信息有更新,请在hbase_info做同步
    # print 'SELECT DISTINCT(program) from hbase_info where source = %s' %source_1
    sqlcursor.execute(
        "SELECT DISTINCT(program) from hbase_info where source = 'buzz';")
    bufferTemp = sqlcursor.fetchall()
    # print len(bufferTemp)
    for one_program in bufferTemp:
        commentsData = []
        tempData = []
        one_program = one_program[0].encode('utf8')
        print type(one_program), one_program
        sqlcursor.execute(
            '''SELECT hbase_table from hbase_info where program = %s and source = %s;''',
            (one_program, source_1))
        bufferTemp = sqlcursor.fetchone()
        program_hbase_table = bufferTemp[0]
        print program_hbase_table
        # 以“JQJM”为关键词的微博原贴代替,按理应该是底下的评论
        table = conn.table(str(program_hbase_table))
        # customerEvaluation_informal需要有program标识
        emotionsWord = []
        emotionsScore = 0
        count = 0
        printCount = 0
        sqlcursor.execute(
            '''SELECT program_id from competition_analysis where program = %s''',
            (one_program, ))
        bufferTemp = sqlcursor.fetchone()
        program_id = bufferTemp[0]
        print program_id
        # row_prefix, limit可以限定次数
        for key, data in table.scan(limit=10, batch_size=10):
            # print 'hhh',key,data
            # for key,data in table.scan(row_prefix = 'row', limit = 10, batch_size = 10):
            date_created = data['base_info:cdate']
            if date_created == otherStyleTime:
                content = data['base_info:text']
                print 'q', content
                # 暂时没有program_id
                # program_id = data['base_info:program_id']

                count += 1
                printCount += 1
                # 处理每一条
                #     情感关键词
                (emotionsWord, emotionsScore) = emProcess.processSentence(
                    rmIrr.removeEverythingButEmotion(content))
                #     倾向性判断flag:1是正面,0是中性,-1是负面

                #     情感极性判断,这里我限制了更严格的条件
                if emotionsScore > 0:
                    tempData.append('1')
                elif emotionsScore == 0:
                    tempData.append('0')
                elif emotionsScore < 0:
                    tempData.append('-1')
        #         情感得分sentimentScore
                tempData.append(emotionsScore)
                # 评论内容
                tempData.append(content)
                #     日期时间
                tempData.append(otherStyleTime)
                # 栏目id
                tempData.append(program_id)
                # 栏目名称
                tempData.append(one_program)
                #     转换为元组
                commentsData.append(tuple(tempData))
                tempData = []
                if count >= 10:
                    sqlcursor.executemany(
                        '''insert into media_evaluation(flag, evaluation, content, date, program_id, program)
                                values (%s, %s, %s, %s, %s, %s)''',
                        commentsData)
                    sqlConn.commit()
                    commentsData = []
                    count = 0
                    print '插入' + str(printCount) + '个'
        # # except:
        # #      print tempData
        sqlcursor.executemany(
            '''insert into media_evaluation(flag, evaluation, content, date, program_id, program)
                            values (%s, %s, %s, %s, %s, %s)''', commentsData)
        sqlConn.commit()
    sqlConn.close()
Пример #8
0
def addcompetitiveProgram(baseurl,mysqlhostIP, mysqlUserName = '******', mysqlPassword = '', dbname = 'btv'):

    # 连接数据库
    sqlConn=MySQLdb.connect(host=mysqlhostIP, user=mysqlUserName, passwd=mysqlPassword, db = dbname, charset='utf8')
    sqlcursor = sqlConn.cursor()

    sqlcursor.execute('''CREATE TABLE IF NOT EXISTS competition_analysis(pk bigint NOT NULL PRIMARY KEY AUTO_INCREMENT, program varchar(50), host varchar(50), time varchar(50), channel varchar(50),
                    introduction varchar(300), program_id varchar(50), image varchar(200), url varchar(200), length varchar(50), program_type_id varchar(2)) DEFAULT CHARSET=utf8;''')
    print '新建库成功'

    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    # 表名
    tablename = "DATA:WEIBO_POST_chunwan"
    r = requests.get(baseurl + "/" + tablename + "/*",  auth=kerberos_auth, headers = {"Accept" : "application/json"})
    if issuccessful(r) == False:
        print "Could not get messages from HBase. Text was:\n" + r.text
    quit()
    bleats = json.loads(r.text)


    # 连接hbase数据库
    # 存储评论数据
    commentsData = []
    tempData = []
    # 处理情感
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()
    # table=conn.table('competitiveProgram')
    # customerEvaluation_informal需要有program标识
    count = 0
    printCount = 0
    # 时间属性
    # inter为0,即为当日
    inter = 0
    now = int(time.time())-86400*inter
    timeArray = time.localtime(now)
    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    print otherStyleTime
    # row_prefix, limit可以限定次数
    # bleats is json file
    for row in bleats['Row']:
        for cell in row['Cell']:
            columnname = base64.b64decode(cell['column'])
            value = cell['$']
            if value == None:
                print 'none'
                continue
                # 播出频道
            if columnname == "'base_info:channel'":
                channel = base64.b64decode(value)
            # 因为是基本信息的抓取,不需要date
            # 播出时间
            if columnname == "'base_info:Broadcast_time'":
                Broadcast_time = base64.b64decode(value)
            # 每集长度
            if columnname == "'base_info:period'":
                period = base64.b64decode(value)
            # 主持人
            if columnname == "'base_info:host'":
                host = base64.b64decode(value)
            # 简介
            if columnname == "'base_info:Info'":
                Info = base64.b64decode(value)
            # 竞争栏目id
            if columnname == "'base_info:program_id'":
                program_id = base64.b64decode(value)
            # 竞争栏目名称
            if columnname == "'base_info:program'":
                program = base64.b64decode(value)
            # url
            if columnname == "'base_info:url'":
                url = base64.b64decode(value)
            count += 1
            printCount+=1
            # 处理每一条
            tempData.append(program)
            tempData.append(host)
            tempData.append(Broadcast_time)
            tempData.append(channel)
            tempData.append(Info)
            tempData.append(program_id)
            # image
            tempData.append('')
            tempData.append(url)
            tempData.append(period)
            # program_type
            tempData.append('1')
        #     转换为元组
            commentsData.append(tuple(tempData))
            tempData = []
            sqlcursor.executemany('''insert into competition_analysis(program, host, time, channel, introduction, program_id, image, url, length, program_type_id)
                        values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''',commentsData)
            sqlConn.commit()
            commentsData = []
    sqlConn.close()
Пример #9
0
def addTVSeriesTable(mongodbIP,mysqlhostIP,timeRangeBeginning, mysqlUserName = '******', mysqlPassword = '******', dbname = 'cctvTimer'):
    
    rmIrr = removeIrrelevant()
    emProcess = emotionProcess()
    # 数据库mysql
    sqlConn=MySQLdb.connect(host = mysqlhostIP, user=mysqlUserName, passwd=mysqlPassword,db = dbname,charset='utf8')
    sqlcursor = sqlConn.cursor()
    
    # sqlcursor.execute('''DROP TABLE IF EXISTs TVSeriesTable;''')
    # print '删库成功'
    sqlcursor.execute('''CREATE TABLE IF NOT EXISTS TVSeriesTable(countIndex bigint(64) primary key, weiboId bigint(64),
                        TvSeriesName varchar(64), userId bigint(64), weiboContent varchar(1024), contentWords varchar(1024),
                        weiboKeywords varchar(1024), sentiment varchar(64), sentimentScore int(16), sentimentKeywords varchar(1024), 
                        createdTime varchar(128)) DEFAULT CHARSET=utf8;''')   
    print '新建库成功'
    
    # mongoDB数据库
    mongoConn = pymongo.Connection(host = mongodbIP, port = 27017)
    # 查询某条微博的index
    mongoCollection = mongoConn.weibo_db_cctv.weibo
    mongoCursor = mongoCollection.find().batch_size(30).skip(2495500)
    
# 读停用词
    path = os.path.abspath(os.path.dirname(sys.argv[0]))
    dicFile = open(path+'/tools/NTUSD_simplified/stopwords.txt','r')
    stopwords = dicFile.readlines()
    stopwordList = []
    stopwordList.append(' ')
    for stopword in stopwords:
        temp = stopword.strip().replace('\r\n','').decode('utf8')
        stopwordList.append(temp)
    dicFile.close() 
    
    # 存储数据
    tvseriesData = []
    tempData = []
    # 计数
    sqlcursor.execute('select count(*) from TVSeriesTable;')
    totalCount = sqlcursor.fetchall()
    totalCount = list(list(totalCount)[0])[0]
    
    count = 0
    printCount = totalCount
    
    for tvWeibo in mongoCursor:
        count+=1
        printCount+=1
    #     countIndex
        tempData.append(printCount)
    #     weiboId
        tempData.append(tvWeibo['wid'])
    #     TvSeriesName
        tempData.append(tvWeibo['keyword'])
    #     userId
        tempData.append(tvWeibo['uid'])
    #     weiboContent
        tempData.append(tvWeibo['content'])
    #     contentWords
        tempcut_out = jieba.cut(rmIrr.removeEverything(tvWeibo['content']))
        cut_out = []
        for i in tempcut_out:
            if i not in stopwordList:
                cut_out.append(i)
        tempData.append(','.join(cut_out))
    #     weiboKeywords
        temp = jieba.analyse.extract_tags(rmIrr.removeEverything(tvWeibo['content']),5)
        weiboKeywords = ','.join(temp)
        tempData.append(weiboKeywords)
    #     sentiment
        (emotionsWord,emotionsScore) = emProcess.processSentence(rmIrr.removeEverythingButEmotion(tvWeibo['content']))
        if emotionsScore>0:
            tempData.append('正面')
        elif emotionsScore==0:
            tempData.append('中立')
        else:
            tempData.append('负面')
#         sentimentScore   
        tempData.append(emotionsScore)    
    #     sentimentKeywords    
        emotionsWord = ','.join(emotionsWord)
        tempData.append(emotionsWord)
    #     createdTime
        createdTime = time.ctime(tvWeibo['time'])
        tempData.append(createdTime)
    #     执行插入
        tvseriesData.append(tuple(tempData))
        tempData = []
        if count>=100:
            sqlcursor.executemany('''insert into TVSeriesTable(countIndex, weiboId, TvSeriesName, userId, weiboContent, contentWords,
                        weiboKeywords, sentiment, sentimentScore, sentimentKeywords, createdTime) 
                        values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''',tvseriesData)
            sqlConn.commit() 
            tvseriesData = []
            count = 0
            print '插入'+str(printCount)+'个'
    sqlcursor.executemany('''insert into TVSeriesTable(countIndex, weiboId, TvSeriesName, userId, weiboContent, contentWords,
                    weiboKeywords, sentiment, sentimentScore, sentimentKeywords, createdTime) 
                    values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''',tvseriesData)
    sqlConn.commit() 
    mongoConn.close()
    sqlConn.close()
Пример #10
0
def addCommentTable(mongodbIP,
                    mysqlhostIP,
                    mysqlUserName='******',
                    mysqlPassword='******',
                    dbname='cctvTimer'):

    # 读停用词
    path = os.path.abspath(os.path.dirname(sys.argv[0]))
    dicFile = open(path + '/tools/NTUSD_simplified/stopwords.txt', 'r')
    stopwords = dicFile.readlines()
    stopwordList = []
    stopwordList.append(' ')
    for stopword in stopwords:
        temp = stopword.strip().replace('\r\n', '').decode('utf8')
        stopwordList.append(temp)
    dicFile.close()

    # 分词
    jieba.initialize()

    # 连接数据库
    sqlConn = MySQLdb.connect(host=mysqlhostIP,
                              user=mysqlUserName,
                              passwd=mysqlPassword,
                              db=dbname,
                              charset='utf8')
    sqlcursor = sqlConn.cursor()
    # 删库
    # sqlcursor.execute('''DROP TABLE IF EXISTs commentTable;''')
    # print '删库成功'

    sqlcursor.execute(
        '''CREATE TABLE IF NOT EXISTS commentTable(countIndex bigint(64) primary key, commentId bigint(64), weiboId bigint(64), userId bigint(64), comment varchar(1024), 
                    sentimentKeywords varchar(128), contentKeywords varchar(1024), sentiment varchar(16), sentimentScore int(16), userName varchar(64), userSex varchar(16),
                    userLocation varchar(64), userFollowerCount int(64), userFriendCount int(64), userStatusCount int(64), userType varchar(32), 
                    spammerJudge varchar(16), replyTime varchar(128)) DEFAULT CHARSET=utf8;'''
    )
    print '新建库成功'

    # 连接mongoDB数据库
    mongoConn = pymongo.Connection(host=mongodbIP, port=27017)
    # check time
    mongoCursor = mongoConn.weibo.timestamp.find({'type': 'comment'}).limit(1)
    timeRangeBeginning = datetime.datetime.now() - datetime.timedelta(
        days=9999)
    #     print timeRangeBeginning
    #     a=dict()
    #     a['type']='comment'
    #     a['time']=timeRangeBeginning
    #     mongoConn.weibo.timestamp.insert(a)
    #     a=dict()
    #     a['type']='repost'
    #     a['time']=timeRangeBeginning
    #     mongoConn.weibo.timestamp.insert(a)
    #     a=dict()
    #     a['type']='weibo'
    #     a['time']=timeRangeBeginning
    #     mongoConn.weibo.timestamp.insert(a)
    for i in mongoCursor:
        timeRangeBeginning = i['time']
    newTimestamp = timeRangeBeginning
    # 查询某条微博的回复
    mongoCursor = mongoConn.weibo.comment.find({
        'task_time': {
            '$gt': timeRangeBeginning
        }
    }).sort('task_time').batch_size(30)
    print '查询mongoDB成功'
    # 计数
    sqlcursor.execute('select count(*) from commentTable;')
    totalCount = sqlcursor.fetchall()
    totalCount = list(list(totalCount)[0])[0]

    # 存储评论数据
    commentsData = []
    tempData = []
    # 处理情感
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()
    spamDet = spammerdetect()

    emotionsWord = []
    emotionsScore = 0
    count = 0
    printCount = totalCount
    # 处理每一条
    # try:
    for comment in mongoCursor:
        #        if comment['task_time']>timeRangeBeginning+ datetime.timedelta(days=1):
        #            continue
        count += 1
        printCount += 1
        tempData.append(printCount)
        #     评论id
        tempData.append(comment['comment_id'])
        #     微博id
        tempData.append(comment['weibo_id'])
        #     用户id
        tempData.append(comment['comment_user_id'])
        #     评论内容
        tempData.append(comment['comment_text'])
        #     情感关键词
        (emotionsWord, emotionsScore) = emProcess.processSentence(
            rmIrr.removeEverythingButEmotion(comment['comment_text']))
        emotionsWord = ','.join(emotionsWord)
        tempData.append(emotionsWord)
        #     print comment['mid']
        #     print comment['status']['mid']
        #     print comment['user']['id']
        #     print comment['text']
        # 内容分词
        tempcut_out = jieba.cut(rmIrr.removeEverything(
            comment['comment_text']))
        cut_out = []
        for i in tempcut_out:
            if i not in stopwordList:
                cut_out.append(i)
        tempData.append(','.join(cut_out))
        #     倾向性判断
        if emotionsScore > 0:
            tempData.append('正面')
        elif emotionsScore == 0:
            tempData.append('中立')
        else:
            tempData.append('负面')


#         sentimentScore
        tempData.append(emotionsScore)
        #     用户昵称
        tempData.append(comment['comment_user_name'])
        #     用户性别
        tempData.append(comment['comment_gender'])
        #     用户地域信息
        tempData.append(comment['comment_location'])
        #     用户粉丝数
        tempData.append(comment['comment_followers_count'])
        #     用户关注数
        tempData.append(comment['comment_friends_count'])
        #     用户微博数
        tempData.append(comment['comment_statuses_count'])
        #     用户类型
        if (comment['comment_verified_type'] == -1):
            tempData.append('普通用户')
        elif (comment['comment_verified_type']
              == 220) or (comment['comment_verified_type'] == 200):
            tempData.append('微博达人')
        elif (comment['comment_verified_type'] == 0):
            tempData.append('个人认证')
        else:
            tempData.append('企业认证')
    # 是否水军
        userInfo = {}
        userInfo['statuses_count'] = comment['comment_statuses_count']
        userInfo['followers_count'] = comment['comment_followers_count']
        userInfo['friends_count'] = comment['comment_friends_count']
        userInfo['bi_followers_count'] = comment['comment_bi_followers_count']
        userInfo['domain'] = comment['comment_user_domain']
        userInfo['url'] = comment['comment_url']
        userInfo['description'] = comment['comment_description']
        userInfo['location'] = comment['comment_location']
        userInfo['verified'] = comment['comment_verified']
        userInfo['verified_type'] = comment['comment_verified_type']

        newTimestamp = comment['task_time']

        spamScore = spamDet.detectSpammer(userInfo)
        if spamScore > 0:
            tempData.append("正常")
        else:
            tempData.append("水军")
    #     回复时间
        hh = time.strptime(str(comment['comment_created_at']),
                           '%Y-%m-%d %H:%M:%S')
        commentTime = time.strftime("%a %b %d %H:%M:%S %Y", hh)

        tempData.append(commentTime)
        #     转换为元组
        commentsData.append(tuple(tempData))
        tempData = []
        if count >= 10:
            sqlcursor.executemany(
                '''insert into commentTable(countIndex, commentId, weiboId, userId, comment, sentimentKeywords, contentKeywords, sentiment, sentimentScore, userName, 
                        userSex,userLocation, userFollowerCount, userFriendCount, userStatusCount, userType,spammerJudge, replyTime) 
                        values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''',
                commentsData)
            sqlConn.commit()
            commentsData = []
            count = 0
            print '插入' + str(printCount) + '个'
    # # except:
    # #      print tempData
    sqlcursor.executemany(
        '''insert into commentTable(countIndex, commentId, weiboId, userId, comment, sentimentKeywords, contentKeywords, sentiment, sentimentScore, userName, userSex,
                        userLocation, userFollowerCount, userFriendCount, userStatusCount, userType,spammerJudge, replyTime) 
                        values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''',
        commentsData)
    sqlConn.commit()
    sqlConn.close()
    mongoConn.weibo.timestamp.update({'type': 'comment'},
                                     {'$set': {
                                         'time': newTimestamp
                                     }})
    mongoConn.close()
Пример #11
0
def mentioned_trend(baseurl,
                    mysqlhostIP,
                    mysqlUserName='******',
                    mysqlPassword='',
                    dbname='btv_v2'):
    # 连接数据库
    sqlConn = MySQLdb.connect(host=mysqlhostIP,
                              user=mysqlUserName,
                              passwd=mysqlPassword,
                              db=dbname,
                              charset='utf8')
    sqlcursor = sqlConn.cursor()
    sqlcursor.execute(
        '''CREATE TABLE IF NOT EXISTS gala_likability_mentioned(pk bigint NOT NULL PRIMARY KEY AUTO_INCREMENT, channel varchar(20), mentioned bigint(50), likability bigint(50), date date, program_id varchar(50), program varchar(50)) DEFAULT CHARSET=utf8;'''
    )
    print '新建库成功'

    # 连接hbase数据库
    os.popen('kinit -k -t ctvit.keytab ctvit')
    kerberos_auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
    tablename = "DATA:WEIBO_POST_Keywords"
    r = requests.get(baseurl + "/" + tablename + "/*",
                     auth=kerberos_auth,
                     headers={"Accept": "application/json"})
    if issuccessful(r) == False:
        print "Could not get messages from HBase. Text was:\n" + r.text
    # quit()
    bleats = json.loads(r.text)
    # 北京卫视 2.8
    # 时间属性
    # inter为0,即为当日
    # inter = 91
    # now = int(time.time())-86400*inter
    # timeArray = time.localtime(now)
    # otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    # print otherStyleTime
    #
    # # 存储评论数据
    # sqlcursor.execute('''SELECT mentioned from gala_mentioned_trend where date = %s and program = '2016年北京卫视春节联欢晚会';''',(otherStyleTime,))
    # bufferTemp = sqlcursor.fetchone()
    # chunwan_times = bufferTemp[0]
    # print chunwan_times
    # emProcess = emotionProcess()
    # rmIrr = removeIrrelevant()
    # tempData = []
    # sum_emotionsScore = 0
    # # bleats is json file
    # for row in bleats['Row']:
    #     flag = True
    #     for cell in row['Cell']:
    #         columnname = base64.b64decode(cell['column'])
    #         value = cell['$']
    #         if value == None:
    #             print 'none'
    #             continue
    #         if columnname == "base_info:match":
    #             column = base64.b64decode(value)
    #             if ("北京卫视春晚"  not in column) and ("北京台的春晚" not in column) and ("BTV春晚" not in column) and ("BTV春晚" not in column) and ("bTV春晚" not in column):
    #                 flag = False
    #                 break
    #         if columnname == "base_info:cdate":
    #             cdate = base64.b64decode(value)
    #             cdate = cdate.split('T')[0]
    #             if cdate != otherStyleTime:
    #                 flag = False
    #                 break
    #         if columnname == "base_info:text":
    #             content = base64.b64decode(value)
    #             # print 'q',content
    #     #     情感关键词
    #     if flag:
    #         (emotionsWord,emotionsScore) = emProcess.processSentence(rmIrr.removeEverythingButEmotion(content))
    #     #     倾向性判断flag:1是正面,0是中性,-1是负面
    #     #     情感极性判断,这里我限制了更严格的条件
    #         # channel
    #         # if emotionsScore > 0:
    #         sum_emotionsScore += emotionsScore
    # tempData.append('北京卫视')
    # # mentioned
    # tempData.append(chunwan_times)
    # # likability
    # tempData.append(sum_emotionsScore)
    # # date
    # tempData.append(otherStyleTime)
    # # program_id
    # tempData.append('100')
    # # program
    # tempData.append('2016年北京卫视春节联欢晚会')
    # sqlcursor.execute('''insert into gala_likability_mentioned(channel, mentioned, likability, date, program_id, program) values (%s, %s, %s, %s, %s, %s)''',tempData)
    # sqlConn.commit()
    # tempData = []

    # 湖南卫视 2.2
    # inter为0,即为当日
    inter_hn = 97
    now = int(time.time()) - 86400 * inter_hn
    timeArray = time.localtime(now)
    otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    print 'bb', otherStyleTime

    # 存储评论数据
    sqlcursor.execute(
        '''SELECT mentioned from gala_mentioned_trend where date = %s and program = '2016湖南卫视小年夜春晚';''',
        (otherStyleTime, ))
    bufferTemp = sqlcursor.fetchone()
    chunwan_times = bufferTemp[0]
    print chunwan_times
    emProcess = emotionProcess()
    rmIrr = removeIrrelevant()
    tempData = []
    sum_emotionsScore = 0
    # bleats is json file
    for row in bleats['Row']:
        flag = True
        for cell in row['Cell']:
            columnname = base64.b64decode(cell['column'])
            value = cell['$']
            if value == None:
                print 'none'
                continue
            if columnname == "base_info:match":
                column = base64.b64decode(value)
                if ("湖南" not in column) and ("芒果" not in column):
                    flag = False
                    break
            if columnname == "base_info:cdate":
                cdate = base64.b64decode(value)
                cdate = cdate.split('T')[0]
                if cdate != otherStyleTime:
                    flag = False
                    break
            if columnname == "base_info:text":
                content = base64.b64decode(value)
                # print 'q',content
        #     情感关键词
        if flag:
            (emotionsWord, emotionsScore) = emProcess.processSentence(
                rmIrr.removeEverythingButEmotion(content))
            #     倾向性判断flag:1是正面,0是中性,-1是负面
            #     情感极性判断,这里我限制了更严格的条件
            # channel
            # if emotionsScore > 0:
            sum_emotionsScore += emotionsScore
    tempData.append('湖南卫视')
    # mentioned
    tempData.append(chunwan_times)
    # likability
    tempData.append(sum_emotionsScore)
    # date
    tempData.append(otherStyleTime)
    # program_id
    tempData.append('100')
    # program
    tempData.append('2016湖南卫视小年夜春晚')
    sqlcursor.execute(
        '''insert into gala_likability_mentioned(channel, mentioned, likability, date, program_id, program) values (%s, %s, %s, %s, %s, %s)''',
        tempData)
    sqlConn.commit()
    tempData = []

    # 辽宁卫视2.6
    # inter_ln = 93
    # now = int(time.time())-86400*inter_ln
    # timeArray = time.localtime(now)
    # otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
    # print otherStyleTime
    #
    # # 存储评论数据
    # sqlcursor.execute('''SELECT mentioned from gala_mentioned_trend where date = %s and program = '2016年辽宁卫视春晚';''',(otherStyleTime,))
    # bufferTemp = sqlcursor.fetchone()
    # chunwan_times = bufferTemp[0]
    # print chunwan_times
    # emProcess = emotionProcess()
    # rmIrr = removeIrrelevant()
    # tempData = []
    # sum_emotionsScore = 0
    # # bleats is json file
    # for row in bleats['Row']:
    #     flag = True
    #     for cell in row['Cell']:
    #         columnname = base64.b64decode(cell['column'])
    #         value = cell['$']
    #         if value == None:
    #             print 'none'
    #             continue
    #         if columnname == "base_info:match":
    #             column = base64.b64decode(value)
    #             if ("辽宁"  not in column):
    #                 flag = False
    #                 break
    #         if columnname == "base_info:cdate":
    #             cdate = base64.b64decode(value)
    #             cdate = cdate.split('T')[0]
    #             if cdate != otherStyleTime:
    #                 flag = False
    #                 break
    #         if columnname == "base_info:text":
    #             content = base64.b64decode(value)
    #             # print 'q',content
    #     #     情感关键词
    #     if flag:
    #         (emotionsWord,emotionsScore) = emProcess.processSentence(rmIrr.removeEverythingButEmotion(content))
    #     #     倾向性判断flag:1是正面,0是中性,-1是负面
    #     #     情感极性判断,这里我限制了更严格的条件
    #         # channel
    #         # if emotionsScore > 0:
    #         sum_emotionsScore += emotionsScore
    # tempData.append('辽宁卫视')
    # # mentioned
    # tempData.append(chunwan_times)
    # # likability
    # tempData.append(sum_emotionsScore)
    # # date
    # tempData.append(otherStyleTime)
    # # program_id
    # tempData.append('100')
    # # program
    # tempData.append('2016年辽宁卫视春晚')
    # sqlcursor.execute('''insert into gala_likability_mentioned(channel, mentioned, likability, date, program_id, program) values (%s, %s, %s, %s, %s, %s)''',tempData)
    # sqlConn.commit()
    # tempData = []
    sqlConn.close()