Пример #1
0
def GetOthersData(chatroom):
    '''
    将单个单聊或公众号的消息导入到数据库
    '''
    pattern1 = re.compile('"(.*?)"')
    well_tempered_data = []
    with sqlInit.SqliteInit() as sqlite_cur:
        result = sqlite_cur.execute(
            "SELECT Type, CreateTime, Message, Des from " + chatroom)
        for row in result:
            Type = row[0]
            CreateTime = row[1]
            Des = row[3]
            if (Type == 10000) or (Type == 10002):
                if ("撤回" in row[2]) or ("recalled a message" in row[2]):
                    Message = "撤回消息"
                    if not (len(pattern1.findall(row[2])) > 0):
                        Des = 0
                else:
                    Message = row[2].replace("\n", "")
            else:
                Message = row[2].replace("\n", "")
            well_tempered_data.append([Type, CreateTime, Message, Des])
    sql = "INSERT INTO " + chatroom + "(Type,CreateTime,Message,Des) VALUES(%s,%s,%s,%s)"
    with sqlInit.MysqlInit() as mysql_cur:
        for line in well_tempered_data:
            mysql_cur.execute(sql, line)
Пример #2
0
def GetRowNum(chatroom, db="sqlite", Des=2):
    '''
    返回表的条数
    '''
    rowNum = []
    if db == "mysql":
        with sqlInit.MysqlInit() as mysql_cur:
            if Des == 2:
                sql = "select count(*) from " + chatroom
            else:
                sql = "select count(*) from " + chatroom + " where Des=" + str(
                    Des)
            mysql_cur.execute(sql)
            fetchResult = mysql_cur.fetchall()
            for row in fetchResult:
                rowNum = row[0]
    else:
        with sqlInit.SqliteInit() as sqlite_cur:
            if Des == 2:
                sql = "select count(*) from " + chatroom
            else:
                sql = "select count(*) from " + chatroom + " where Des=" + str(
                    Des)
            fetchResult = sqlite_cur.execute(sql)
            for row in fetchResult:
                rowNum = row[0]
    return int(rowNum)
Пример #3
0
def InsertFriends(chatroom, Type):
    '''
    将联系人信息导入到数据库
    '''
    sql = "INSERT INTO Friends(Type,EncodeUserName,UserName) VALUES(%s,%s,%s)"
    with sqlInit.MysqlInit() as mysql_cur:
        mysql_cur.execute(sql, [Type, chatroom, GetWXID(chatroom)])
Пример #4
0
def CreateTable(chatroom="", type=4):
    '''
    创建表
    type:1:群聊,4:联系人表,5:地理信息表,其它:单聊或公众号
    '''
    sql_group = "create table " + chatroom + """(
                    id             int(11)      unsigned   NOT NULL  AUTO_INCREMENT,
                    Type           int(4)       unsigned   NOT NULL,
                    CreateTime     int(11)      unsigned   NOT NULL,
                    SentFrom       TEXT,
                    Message        LONGTEXT,
                    Des            tinyint(1)              NOT NULL,
                    PRIMARY KEY ( id ))
                    ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;"""
    sql_others = "create table " + chatroom + """(
                    id             int(11)      unsigned   NOT NULL  AUTO_INCREMENT,
                    Type           int(4)       unsigned   NOT NULL,
                    CreateTime     int(11)      unsigned   NOT NULL,
                    Message        LONGTEXT,
                    Des            tinyint(1)              NOT NULL,
                    PRIMARY KEY ( id ))
                    ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;"""

    sql_friends = """create table Friends(
                    id             int(11)      unsigned   NOT NULL  AUTO_INCREMENT,
                    Type           int(4)       unsigned   NOT NULL,
                    EncodeUserName TEXT,
                    UserName       TEXT,
                    Remark         TEXT,
                    Sign           TEXT,
                    Location       varchar(50),
                    PRIMARY KEY ( id ))
                    ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;"""

    sql_geodata = """create table Geodata(
                    id             int(50)      unsigned   NOT NULL  AUTO_INCREMENT,
                    code           bigint(50)      unsigned   NOT NULL,
                    name           varchar(50),
                    PRIMARY KEY ( id ))
                    ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;"""

    with sqlInit.MysqlInit() as mysql_cur:
        if chatroom == "":
            if type == 4:
                mysql_cur.execute("DROP TABLE IF EXISTS Friends")
                mysql_cur.execute(sql_friends)
            elif type == 5:
                mysql_cur.execute("DROP TABLE IF EXISTS Geodata")
                mysql_cur.execute(sql_geodata)
        else:
            mysql_cur.execute("DROP TABLE IF EXISTS " + chatroom)
            if type == 1:
                mysql_cur.execute(sql_group)
            else:
                mysql_cur.execute(sql_others)
Пример #5
0
def GetWXID(chatroom):
    '''
    返回微信号
    从mysql中获取,速度快但只能查找已导入的朋友信息
    '''
    sql = "select UserName from friends where EncodeUserName='******'"
    with sqlInit.MysqlInit() as mysql_cur:
        mysql_cur.execute(sql)
        result = mysql_cur.fetchone()
        if result != None:
            return result[0]
        else:
            return ""
Пример #6
0
def MostEmoji():
    chatrooms_group = getChat.GetChatrooms(typename=1)
    chatrooms_single = getChat.GetChatrooms(typename=2)
    chatrooms_all = chatrooms_group + chatrooms_single
    pattern = re.compile(' md5="(.*?)"')
    emoji_dict_to = {}
    emoji_dict_from = {}
    for chatroom in chatrooms_all:
        sql = "SELECT Message,CreateTime as num FROM " + chatroom + " WHERE Type=47 and Des=0"
        with sqlInit.MysqlInit() as mysql_cur:
            mysql_cur.execute(sql)
            result = mysql_cur.fetchall()
            for row in result:
                emoji_md5 = pattern.findall(row[0])[0]
                if len(emoji_md5) > 0:
                    if emoji_md5 in emoji_dict_to.keys():
                        emoji_dict_to[emoji_md5][0] += 1
                    else:
                        emoji_dict_to[emoji_md5] = [1, chatroom, row[1]]
    sorted_list_to = sorted(emoji_dict_to.items(),
                            key=lambda x: x[1][0],
                            reverse=True)
    print(sorted_list_to)
    for chatroom in chatrooms_single:
        sql = "SELECT Message,CreateTime as num FROM " + chatroom + " WHERE Type=47 and Des=1"
        with sqlInit.MysqlInit() as mysql_cur:
            mysql_cur.execute(sql)
            result = mysql_cur.fetchall()
            for row in result:
                emoji_md5 = pattern.findall(row[0])[0]
                if len(emoji_md5) > 0:
                    if emoji_md5 in emoji_dict_from.keys():
                        emoji_dict_from[emoji_md5][0] += 1
                    else:
                        emoji_dict_from[emoji_md5] = [1, chatroom, row[1]]
    sorted_list_from = sorted(emoji_dict_from.items(),
                              key=lambda x: x[1][0],
                              reverse=True)
    print(sorted_list_from)
Пример #7
0
def GetGroupData(chatroom):
    '''
    将单个群聊的消息导入到数据库
    '''
    pattern1 = re.compile('fromusername.*?"(.*?)"')
    pattern2 = re.compile('"(.*?)"')
    well_tempered_data = []
    with sqlInit.SqliteInit() as sqlite_cur:
        result = sqlite_cur.execute(
            "SELECT Type, CreateTime, Message, Des from " + chatroom)
        for row in result:
            Type = row[0]
            CreateTime = row[1]
            Des = row[3]
            if (Type == 10000) or (Type == 10002) or (Type == 1000):
                if ("撤回" in row[2]) or ("recalled a message" in row[2]):
                    Message = "撤回消息"
                    if (len(pattern2.findall(row[2])) > 0):
                        SentFrom = pattern2.findall(row[2])[0]
                    else:
                        SentFrom = "我"
                        Des = 0
                else:
                    SentFrom = "system"
                    Message = row[2].replace("\n", "")
            elif Des == 0:
                SentFrom = "我"
                Message = row[2].replace("\n", "")
            elif "<" in row[2].split(":", 1)[0]:
                if len(pattern1.findall(row[2])) > 0:
                    SentFrom = pattern1.findall(row[2])[0]
                    Message = row[2].replace("\n", "")
                else:
                    SentFrom = "unknow"
                    Message = row[2]
            elif len(pattern1.findall(row[2])) > 0:
                SentFrom = pattern1.findall(row[2])[0]
                Message = row[2].replace(SentFrom + ":", '').replace("\n", "")
            else:
                SentFrom = row[2].split(":", 1)[0]
                Message = row[2].split(":", 1)[1].replace("\n", "")

            well_tempered_data.append(
                [Type, CreateTime, SentFrom, Message, Des])
    sql = "INSERT INTO " + chatroom + "(Type,CreateTime,SentFrom,Message,Des) VALUES(%s,%s,%s,%s,%s)"
    with sqlInit.MysqlInit() as mysql_cur:
        for line in well_tempered_data:
            mysql_cur.execute(sql, line)
Пример #8
0
def MostDay(filename="mostday_to", Des=0):
    '''
    发/收信息最多的一天
    '''
    chatrooms_group = getChat.GetChatrooms(typename=1)
    chatrooms_single = getChat.GetChatrooms(typename=2)
    chatrooms_all = chatrooms_group + chatrooms_single
    CreateTime_counter = {}
    for i in chatrooms_single:
        for j in getChat.GetData(i, ["CreateTime"], Desname=Des):
            time_array = time.localtime(j[0])
            CreateTime = time.strftime("%Y-%m-%d", time_array)
            if CreateTime in CreateTime_counter:
                CreateTime_counter[CreateTime] += 1
            else:
                CreateTime_counter[CreateTime] = 1
    sorted_list = sorted(CreateTime_counter.items(),
                         key=operator.itemgetter(1),
                         reverse=True)

    format_time1 = sorted_list[0][0] + ' 00:00:00'
    format_time2 = sorted_list[0][0] + ' 23:59:59'
    time1 = int(time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
    time2 = int(time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
    chat_with = {}
    my_message = []

    with sqlInit.MysqlInit() as mysql_cur:
        for i in chatrooms_all:
            temp_list = []
            sql = "select Message,Des,Type from " + i + " where CreateTime>=" + str(
                time1) + " and CreateTime<=" + str(time2)
            mysql_cur.execute(sql)
            result = mysql_cur.fetchall()
            for row in result:
                if i != "Chat_b7ebbe67d8f64c77cda5415f4d749cc6" and row[
                        1] == Des and row[2] == 1:
                    temp_list.append(row[0])
                if row[1] == Des and row[2] == 1:
                    my_message.append(row[0])
            if len(temp_list) > 0:
                chat_with[i] = temp_list

    with open("../../output/" + filename + ".txt", "w+",
              encoding="utf-8") as f:
        for key, value in chat_with.items():
            for i in value:
                f.write(key + "," + i + "\n")
Пример #9
0
def GetChatrooms(typename=0):
    '''
    获取指定类型的聊天
    1:群组,2:个人,3:公众号
    '''
    chatrooms = []
    if typename == 0:
        sql = "select EncodeUserName from friends"
    else: 
        sql = "select EncodeUserName from friends where Type="+str(typename)
    with sqlInit.MysqlInit() as mysql_cur:
        mysql_cur.execute(sql)
        result = mysql_cur.fetchall()
        for row in result:
            chatrooms.append(row[0])
    return chatrooms
Пример #10
0
def GeoAll(chartname="", filename="geo_ana_all", typename=0, Des=2):
    '''
    chartname:str,图表名
    filename:str,文件名,存储在output文件夹下
    typename:int,0:全部,1:群组,2:个人,3:公众号
    Des:0:发出,1:接收,2:全部
    '''
    message_list = []
    counter0 = 0
    chatrooms = getChat.GetChatrooms(typename=1)
    chatrooms.extend(getChat.GetChatrooms(typename=2))
    for chatroom in chatrooms:
        for row in getChat.GetData(chatroom=chatroom,
                                   columns=["Message", "Type"],
                                   Desname=Des):
            if row[1] == 1:
                message_list.append(row[0])
                counter0 += 1
    print("数据量:", counter0)
    joined_message = ",".join(message_list)
    cutted_message = []
    for i in jieba.lcut(joined_message):
        if len(i) > 1:
            cutted_message.append(i)

    name_id_dict = {}
    sql = "select id,name from Geodata"
    with sqlInit.MysqlInit() as mysql_cur:
        mysql_cur.execute(sql)
        result = mysql_cur.fetchall()
        for row in result:
            name_id_dict[row[1]] = row[0]

    word_counter_dict = {}
    counter1 = 0
    for word in cutted_message:
        if word in name_id_dict.keys():
            counter1 += 1
            if not word in word_counter_dict:
                word_counter_dict[word] = 1
            else:
                word_counter_dict[word] += 1
    sorted_list = sorted(word_counter_dict.items(),
                         reverse=True,
                         key=operator.itemgetter(1))
    return sorted_list
Пример #11
0
def GetData(chatroom,columns=["Message"],Desname=2):
    '''
    获取单个聊天的数据
    chatroom:str;
    columns:list,["id","Type","SentFrom","CreateTime","Message","Des"]中选择,其中个人与公众号没有SentFrom;
    Desname:int, 0:发出,1:接收,2:全部。
    '''
    Messages_list = []
    if Desname == 2:
        sql = "select "+",".join(columns)+" from "+chatroom
    else:
        sql = "select "+",".join(columns)+" from "+chatroom+" where Des="+str(Desname)
    with sqlInit.MysqlInit() as mysql_cur:
        mysql_cur.execute(sql)
        result = mysql_cur.fetchall()
        for row in result:
            Messages_list.append(row)
    return Messages_list
Пример #12
0
def GeoMap(filename="geo_ana"):
    f = open("geo.txt", "r", encoding="utf-8")
    params = f.readline()
    f.close()
    place_list = re.findall("[(](.*?)[)]", params)
    name_counter_dict = {}
    for i in place_list:
        name_counter_dict[i.split(",")[0].strip('"').strip("'")] = int(
            i.split(",")[1].strip("'"))
    tempered_name_counter_dict = {}
    for key, value in name_counter_dict.items():
        if len(key) > 2 and (key[-1] == "市" or key[-1] == "省"):
            if key[:-1] in tempered_name_counter_dict.keys():
                tempered_name_counter_dict[
                    key[:-1]] = tempered_name_counter_dict[key[:-1]] + value
            else:
                tempered_name_counter_dict[key[:-1]] = value
        elif key in [
                '上合', '南山区', '翻身', '科技园', '桃园', '光明', '大学城', '八卦岭', '龙华', '西乡',
                '华侨城', '梧桐山', '大冲', '沙井', '红树湾'
        ]:
            if "深圳" in tempered_name_counter_dict.keys():
                tempered_name_counter_dict[
                    "深圳"] = tempered_name_counter_dict["深圳"] + value
            else:
                tempered_name_counter_dict["深圳"] = value
        elif key in ['中关村']:
            if "北京" in tempered_name_counter_dict.keys():
                tempered_name_counter_dict[
                    "北京"] = tempered_name_counter_dict["北京"] + value
            else:
                tempered_name_counter_dict["北京"] = value
        elif key in ['虹桥']:
            if "上海" in tempered_name_counter_dict.keys():
                tempered_name_counter_dict[
                    "上海"] = tempered_name_counter_dict["上海"] + value
            else:
                tempered_name_counter_dict["上海"] = value
        else:
            tempered_name_counter_dict[key] = value

    name_code_dict = {}
    code_name_dict = {}
    code_counter_dict = {}
    simplified_code_counter_dict = {}
    simplified_name_counter_dict = {}
    final_data = []
    sql = "select code,name from Geodata"
    with sqlInit.MysqlInit() as mysql_cur:
        mysql_cur.execute(sql)
        result = mysql_cur.fetchall()
        for row in result:
            name_code_dict[row[1]] = row[0]
            code_name_dict[row[0]] = row[1]
    for key, value in tempered_name_counter_dict.items():
        code_counter_dict[name_code_dict[key]] = value
    for key, value in code_counter_dict.items():
        if len(str(key)) <= 4:
            simplified_code_counter_dict[key] = value
        else:
            if int(str(key)[:4]) in simplified_code_counter_dict.keys():
                simplified_code_counter_dict[int(
                    str(key)[:4])] = simplified_code_counter_dict[int(
                        str(key)[:4])] + value
            else:
                simplified_code_counter_dict[int(str(key)[:4])] = value
    for key, value in simplified_code_counter_dict.items():
        with sqlInit.GeoSqlInit() as sqlite_cur:
            if len(str(key)) == 2:
                sql1 = "select name from province where code=" + str(key)
                fetchResult1 = sqlite_cur.execute(sql1)
                for row in fetchResult1:
                    simplified_name_counter_dict[row[0]] = value
                    if "自治区" in row[0]:
                        final_data.append((row[0].strip("自治区"), value))
                    else:
                        final_data.append((row[0][:-1], value))
            elif len(str(key)) == 4:
                sql2 = "select name from city where code=" + str(key)
                fetchResult2 = sqlite_cur.execute(sql2)
                for row in fetchResult2:
                    if row[0] == "市辖区":
                        sql3 = "select name from province where code=" + str(
                            key)[:2]
                        fetchResult3 = sqlite_cur.execute(sql3)
                        for row2 in fetchResult3:
                            simplified_name_counter_dict[row2[0]] = value
                            final_data.append((row2[0], value))
                    elif row[0] == "襄阳市":
                        simplified_name_counter_dict[row[0][:-1]] = value
                        final_data.append((row[0][:-1], value))
                    else:
                        simplified_name_counter_dict[row[0]] = value
                        final_data.append((row[0], value))
    data = final_data
    geo = Geo(
        "",
        "",
        title_color="#fff",
        title_pos="center",
        width=1200,
        height=600,
        background_color="#404a59",
    )
    attr, value = geo.cast(data)
    geo.add("",
            attr,
            value,
            visual_range=[0, 200],
            visual_text_color="#fff",
            symbol_size=15,
            type="heatmap",
            is_visualmap=True,
            maptype='china',
            coordinate_region='中国')
    geo.render(path="./output/" + filename + ".html")
    geo.render(path="./output/" + filename + ".pdf")
    print("已生成图")
Пример #13
0
def Lonelydude(filename="lonelydude",typename=2):
    '''
    用于获取发出但没有收到回复的消息和收到但没有回复对方的消息
    filename:str,文件名,存储在output文件夹下
    typename:int,0:全部,1:群组,2:个人,3:公众号
    '''
    if typename==0:
        chatrooms_group = getChat.GetChatrooms(typename=1)
        chatrooms_single = getChat.GetChatrooms(typename=2)
        chatrooms = chatrooms_group + chatrooms_single
    else:
        chatrooms = getChat.GetChatrooms(typename=typename)
    CreateTime_counter_to = {}
    CreateTime_counter_from = {}
    for i in chatrooms:
        for j in getChat.GetData(i,["CreateTime"],Desname=0):
            time_array = time.localtime(j[0])
            CreateTime = time.strftime("%Y-%m-%d", time_array)
            if CreateTime in CreateTime_counter_to:
                CreateTime_counter_to[CreateTime] += 1
            else:
                CreateTime_counter_to[CreateTime] = 1
        for k in getChat.GetData(i,["CreateTime"],Desname=1):
            time_array = time.localtime(k[0])
            CreateTime = time.strftime("%Y-%m-%d", time_array)
            if CreateTime in CreateTime_counter_from:
                CreateTime_counter_from[CreateTime] += 1
            else:
                CreateTime_counter_from[CreateTime] = 1
    no_response = []
    no_reply = []
    no_response = [i for i in CreateTime_counter_to.keys() if i not in CreateTime_counter_from.keys()]
    no_reply = [i for i in CreateTime_counter_from.keys() if i not in CreateTime_counter_to.keys()]
    print(no_response)
    print(no_reply)
    no_response_with = {}
    for i in no_response:
        format_time1 = i+' 00:00:00'
        format_time2 = i+' 23:59:59'
        time1 = int(time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
        time2 = int(time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
        with sqlInit.MysqlInit() as mysql_cur:
            for j in chatrooms:
                temp_list = []
                sql = "select Message from "+j+" where CreateTime>="+str(time1)+" and CreateTime<="+str(time2)+" and Des=0"
                mysql_cur.execute(sql)
                result = mysql_cur.fetchall()
                for row in result:
                    temp_list.append(row[0])
                if len(temp_list)>0:
                    no_response_with[j] = temp_list

    no_reply_with = {}
    for i in no_reply:
        format_time1 = i+' 00:00:00'
        format_time2 = i+' 23:59:59'
        time1 = int(time.mktime(time.strptime(format_time1, "%Y-%m-%d %H:%M:%S")))
        time2 = int(time.mktime(time.strptime(format_time2, "%Y-%m-%d %H:%M:%S")))
        with sqlInit.MysqlInit() as mysql_cur:
            for j in chatrooms:
                temp_list = []
                sql = "select Message from "+j+" where CreateTime>="+str(time1)+" and CreateTime<="+str(time2)+" and Des=1"
                mysql_cur.execute(sql)
                result = mysql_cur.fetchall()
                for row in result:
                    temp_list.append(row[0])
                if len(temp_list)>0:
                    no_reply_with[j] = temp_list

    with open("../../output/"+filename+".txt","w+",encoding="utf-8") as f:
        f.write("未获得回复:\n")
        for key,value in no_response_with.items():
            for i in value:
                f.write(key+","+i+"\n")
        f.write("\n未回复对方:\n")
        for key,value in no_reply_with.items():
            for i in value:
                f.write(key+","+i+"\n")
Пример #14
0
def GetGeoData():
    '''
    将地理信息数据导入到数据库
    '''
    geo_dict = {}
    with sqlInit.SqliteInit('../../geodata/data.sqlite') as sqlite_cur:
        fetchResult1 = sqlite_cur.execute("select code,name from area")
        for row in fetchResult1:
            if len(row[1]) < 3:
                if row[1] in geo_dict.keys():
                    if len(geo_dict[row[1]]) >= len(row[0]):
                        geo_dict[row[1]] = row[0]
                else:
                    geo_dict[row[1]] = row[0]
            elif len(row[1]) == 3:
                if row[1][:2] in geo_dict.keys():
                    if len(geo_dict[row[1][:2]]) >= len(row[0]):
                        geo_dict[row[1][:2]] = row[0]
                else:
                    geo_dict[row[1][:2]] = row[0]
            elif len(row[1]) > 3:
                if row[1][:3] in geo_dict.keys():
                    if len(geo_dict[row[1][:3]]) >= len(row[0]):
                        geo_dict[row[1][:3]] = row[0]
                else:
                    geo_dict[row[1][:3]] = row[0]
        fetchResult2 = sqlite_cur.execute("select code,name from city")
        for row in fetchResult2:
            if len(row[1]) < 3:
                if row[1] in geo_dict.keys():
                    if len(geo_dict[row[1]]) >= len(row[0]):
                        geo_dict[row[1]] = row[0]
                else:
                    geo_dict[row[1]] = row[0]
            elif len(row[1]) == 3:
                if row[1][:2] in geo_dict.keys():
                    if len(geo_dict[row[1][:2]]) >= len(row[0]):
                        geo_dict[row[1][:2]] = row[0]
                else:
                    geo_dict[row[1][:2]] = row[0]
            elif len(row[1]) > 3:
                if row[1][:3] in geo_dict.keys():
                    if len(geo_dict[row[1][:3]]) >= len(row[0]):
                        geo_dict[row[1][:3]] = row[0]
                else:
                    geo_dict[row[1][:3]] = row[0]
        fetchResult3 = sqlite_cur.execute("select code,name from province")
        for row in fetchResult3:
            if len(row[1]) < 3:
                if row[1] in geo_dict.keys():
                    if len(geo_dict[row[1]]) >= len(row[0]):
                        geo_dict[row[1]] = row[0]
                else:
                    geo_dict[row[1]] = row[0]
            elif len(row[1]) == 3:
                if row[1][:2] in geo_dict.keys():
                    if len(geo_dict[row[1][:2]]) >= len(row[0]):
                        geo_dict[row[1][:2]] = row[0]
                else:
                    geo_dict[row[1][:2]] = row[0]
            elif len(row[1]) > 3:
                if row[1][:3] in geo_dict.keys():
                    if len(geo_dict[row[1][:3]]) >= len(row[0]):
                        geo_dict[row[1][:3]] = row[0]
                else:
                    geo_dict[row[1][:3]] = row[0]
        fetchResult4 = sqlite_cur.execute("select code,name from village")
        for row in fetchResult4:
            if len(row[1]) < 3:
                if row[1] in geo_dict.keys():
                    if len(geo_dict[row[1]]) >= len(row[0]):
                        geo_dict[row[1]] = row[0]
                else:
                    geo_dict[row[1]] = row[0]
            elif len(row[1]) == 3:
                if row[1][:2] in geo_dict.keys():
                    if len(geo_dict[row[1][:2]]) >= len(row[0]):
                        geo_dict[row[1][:2]] = row[0]
                else:
                    geo_dict[row[1][:2]] = row[0]
            elif len(row[1]) > 3:
                if row[1][:3] in geo_dict.keys():
                    if len(geo_dict[row[1][:3]]) >= len(row[0]):
                        geo_dict[row[1][:3]] = row[0]
                else:
                    geo_dict[row[1][:3]] = row[0]
    sql = "INSERT INTO geodata(code,name) VALUES(%s,%s)"
    print(len(geo_dict))
    with sqlInit.MysqlInit() as mysql_cur:
        for key, value in geo_dict.items():
            mysql_cur.execute(sql, [value, key])
Пример #15
0
def TypeAnalyse():
    chatrooms_single = getChat.GetChatrooms(typename=2)
    single_type_counter_to = {}
    single_type_counter_from = {}
    for i in chatrooms_single:
        sql1 = "SELECT Type,count(*) as num FROM " + i + " WHERE Des=0 GROUP BY Type"
        sql2 = "SELECT Type,count(*) as num FROM " + i + " WHERE Des=1 GROUP BY Type"
        with sqlInit.MysqlInit() as mysql_cur:
            mysql_cur.execute(sql1)
            result = mysql_cur.fetchall()
            for j in result:
                if j[0] in single_type_counter_to.keys():
                    single_type_counter_to[j[0]] += j[1]
                else:
                    single_type_counter_to[j[0]] = j[1]
        with sqlInit.MysqlInit() as mysql_cur:
            mysql_cur.execute(sql2)
            result = mysql_cur.fetchall()
            for j in result:
                if j[0] in single_type_counter_from.keys():
                    single_type_counter_from[j[0]] += j[1]
                else:
                    single_type_counter_from[j[0]] = j[1]

    if 10002 in single_type_counter_to.keys():
        if not 10000 in single_type_counter_to.keys():
            single_type_counter_to[10000] = single_type_counter_to[10002]
        else:
            single_type_counter_to[10000] += single_type_counter_to[10002]
            del single_type_counter_to[10002]
    if 10002 in single_type_counter_from.keys():
        if not 10000 in single_type_counter_from.keys():
            single_type_counter_from[10000] = single_type_counter_from[10002]
        else:
            single_type_counter_from[10000] += single_type_counter_from[10002]
            del single_type_counter_from[10002]

    if 62 in single_type_counter_to.keys():
        if not 43 in single_type_counter_to.keys():
            single_type_counter_to[43] = single_type_counter_to[62]
        else:
            single_type_counter_to[43] += single_type_counter_to[62]
            del single_type_counter_to[62]
    if 62 in single_type_counter_from.keys():
        if not 43 in single_type_counter_from.keys():
            single_type_counter_from[43] = single_type_counter_from[62]
        else:
            single_type_counter_from[43] += single_type_counter_from[62]
            del single_type_counter_from[62]
    define_dict = {
        "文字": 1,
        "图片": 3,
        "语音": 34,
        "名片": 42,
        "视频": 43,
        "表情": 47,
        "定位": 48,
        "链接": 49,
        "微信电话": 50,
        "系统消息": 10000
    }
    attr = ["文字", "表情", "图片", "视频", "语音", "名片", "定位", "链接", "微信电话", "系统消息"]
    v1 = [single_type_counter_to[define_dict[i]] for i in attr]
    v2 = [single_type_counter_from[define_dict[i]] for i in attr]
    pie = Pie("", width=1000, height=400)
    pie.add("发出",
            attr,
            v1,
            center=[25, 50],
            is_random=True,
            radius=[30, 75],
            is_legend_show=True,
            is_label_show=True,
            legend_top="bottom")
    pie.add("接收",
            attr,
            v2,
            center=[75, 50],
            is_random=True,
            radius=[30, 75],
            is_legend_show=True,
            is_label_show=True,
            legend_top="bottom")
    pie.render(path="../../output/type_ana.html")
    pie.render(path="../../output/type_ana.pdf")
Пример #16
0
def BaseAnalyse():
    '''
    个人总数
    群聊总数
    总发出消息(个人+群组)
    总发出消息(个人)
    总接收消息(个人+群组)
    总接收消息(个人)
    总撤回消息(自己)
    总撤回消息(个人)
    '''
    counter1 = 0
    counter2 = 0
    counter3 = 0
    counter4 = 0
    recall_to = {}
    recall_from = {}
    recall_to_rate = {}
    recall_from_rate = {}
    chatrooms_group = getChat.GetChatrooms(typename=1)
    chatrooms_single = getChat.GetChatrooms(typename=2)
    print("个人总数:" + str(len(chatrooms_single)))
    print("群聊总数:" + str(len(chatrooms_group)))
    chatrooms_all = chatrooms_group + chatrooms_single
    message_length_to = []
    message_length_from = []
    for chatroom in chatrooms_single:
        sql3 = "SELECT AVG(CHAR_LENGTH(Message)) FROM " + chatroom + " WHERE Type=1 and Des=0"
        sql4 = "SELECT AVG(CHAR_LENGTH(Message)) FROM " + chatroom + " WHERE Type=1 and Des=1"
        with sqlInit.MysqlInit() as mysql_cur:
            mysql_cur.execute(sql3)
            result = mysql_cur.fetchone()
            if result[0] != None:
                message_length_to.append(float(result[0]))
        with sqlInit.MysqlInit() as mysql_cur:
            mysql_cur.execute(sql4)
            result = mysql_cur.fetchone()
            if result[0] != None:
                message_length_from.append(float(result[0]))
    print("平均发出消息长度为:" + "%.2f" % np.mean(message_length_to))
    print("平均接收消息长度为:" + "%.2f" % np.mean(message_length_from))
    for chatroom in chatrooms_all:
        counter1 += getChat.GetRowNum(chatroom, Des=0)
        counter3 += getChat.GetRowNum(chatroom, Des=1)
    for chatroom in chatrooms_single:
        sql1 = "select count(*) from " + chatroom + " where Message='撤回消息' and Des=0"
        sql2 = "select count(*) from " + chatroom + " where Message='撤回消息' and Des=1"

        counter2 += getChat.GetRowNum(chatroom, Des=0)
        counter4 += getChat.GetRowNum(chatroom, Des=1)
        with sqlInit.MysqlInit() as mysql_cur:
            mysql_cur.execute(sql1)
            result = mysql_cur.fetchone()
            if result != None:
                if result[0] != 0:
                    rownum = getChat.GetRowNum(chatroom)
                    recall_to[chatroom] = result[0]
                    if rownum > 100:
                        recall_rate = 1000 * result[0] / rownum
                        recall_to_rate[chatroom] = round(recall_rate, 2)
        with sqlInit.MysqlInit() as mysql_cur:
            mysql_cur.execute(sql2)
            result = mysql_cur.fetchone()
            if result != None:
                if result[0] != 0:
                    rownum = getChat.GetRowNum(chatroom)
                    recall_from[chatroom] = result[0]
                    if rownum > 100:
                        recall_rate = 1000 * result[0] / rownum
                        recall_from_rate[chatroom] = round(recall_rate, 2)
    recall_to_sum = sum(list(recall_to.values()))
    recall_from_sum = sum(list(recall_from.values()))
    sorted_recall_to = sorted(recall_to_rate.items(),
                              key=operator.itemgetter(1),
                              reverse=True)
    sorted_recall_from = sorted(recall_from_rate.items(),
                                key=operator.itemgetter(1),
                                reverse=True)
    print(sorted_recall_to)
    print(sorted_recall_from)
    print("总共发出:" + str(counter1))
    print("总共发出(个人):" + str(counter2))
    print("总共接收:" + str(counter3))
    print("总共接收(个人):" + str(counter4))
    print("我总共撤回:" + str(recall_to_sum))
    print("总共被撤回:" + str(recall_from_sum))