def showLastDays(word, days):
    begdate, enddate = getTimeDomain(RFF.getDateList())
    begdate = enddate - datetime.timedelta(days=days)
    spostdate = getPostDatebyTimeDomain(begdate, enddate,
                                        RFF.getPostDataList())
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    while x < days:  #初始化频率数组
        feqlist.append(0)
        timeline.append(str(xdate.month) + "-" + str(xdate.day))
        xdate += datetime.timedelta(days=1)
        x += 1
    #sposdate:[ [内容,作者,时间],[......],...... ]
    for post in spostdate:
        if post[0].find(word) > -1:
            satpos = (datetime.datetime.strptime(post[2], "%Y-%m-%d %H:%M") -
                      begdate).days
            feqlist[satpos - 1] += 1
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间', '出现次数(帖子/回帖总数:' + str(len(spostdate)) + ')', timeline, feqlist,
        '时间频率图(' + str(begdate.date()) + "->" + str(enddate.date()) + ")")
    print('>>>>>图像加载完毕')
def showLastYears(word, years):
    begdate, enddate = getTimeDomain(RFF.getDateList())
    begdate = enddate - datetime.timedelta(days=years * 365)
    spostdate = getPostDatebyTimeDomain(begdate, enddate,
                                        RFF.getPostDataList())
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    print("begdate=", begdate, "enddate=", enddate)
    while x <= years:  #初始化频率数组
        feqlist.append(0)
        timeline.append(str(xdate.year) + "年")
        print(str(xdate.year))
        xdate += datetime.timedelta(days=365)
        x += 1
    #sposdate:[ [内容,作者,时间],[......],...... ]
    for post in spostdate:
        if post[0].find(word) > -1:
            postdate = datetime.datetime.strptime(post[2], "%Y-%m-%d %H:%M")
            satpos = postdate.year - begdate.year
            print("satpos=", satpos, "\tpostdate=", postdate, "\tbegdate=",
                  begdate, "\tyear1=", postdate.year, "\tyear2=", begdate.year)
            feqlist[satpos] += 1
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间', '出现次数(帖子/回帖总数:' + str(len(spostdate)) + ')', timeline, feqlist,
        '时间频率图(' + str(begdate.year) + "->" + str(enddate.year) + ")")
    print('>>>>>图像加载完毕')
Пример #3
0
def activeTimeAnaylize(authorname, days):
    print("获取数据集中的最近时间...")
    enddate = RFF.queryDatasourceLatestTime()
    spostdate = []
    print("计算时间区间...")
    if days > 0:
        begdate = enddate - datetime.timedelta(days=days)
        print('时间区间:', begdate, '->', enddate)
    else:
        begdate = RFF.queryDatasourceEarlyTime()
        print('时间区间:', begdate, '->', enddate)
    print("获取回帖列表...")
    spostdate = RFF.queryContainListAfterTime(authorname, str(begdate))
    llen = len(spostdate)
    print("开始统计.")
    #开始统计词频
    tpostdata = sortandget(spostdate)
    tpostdata = gatherbyDays(tpostdata)  # [  [date,[ countlist ]    ],    ]
    #for post in tpostdata:
    #    print(str(post))
    #开始分析活跃时间段
    #每天的情况都分析一次,然后叠加求均值
    # [  [date,[ countlist ]    ],    ]
    xvalue = [
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20, 21, 22, 23
    ]
    FEQLIST = []
    for post in tpostdata:
        feqlist = [
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0
        ]
        for time in post[1]:
            hour = time.hour
            feqlist[hour] += 1
        FEQLIST.append(feqlist)
        print(str(feqlist))
    del tpostdata
    #平均下
    avgfeq = [
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    ]
    hour = 0
    for x in avgfeq:
        sum = 0
        for hoursum in FEQLIST:
            sum += hoursum[hour]
        avgfeq[hour] = sum
        hour += 1
    print("after add up all :\n\n", str(avgfeq))
    drawGraphic.linePlotGraphics(
        '时间(小时)', '发帖次数', xvalue, avgfeq,
        "【" + authorname + '】的活跃时间段图(共 ' + str(len(FEQLIST)) + " 天数据)")
Пример #4
0
def showLastDays(authorname,days):
    print("加载任务结果文件...")
    buf = RFF.openResult()
    datebuf = RFF.getDateList(buf)
    begdate,enddate = getTimeDomain(datebuf)
    del datebuf
    print("计算时间区间...")
    begdate = enddate - datetime.timedelta(days=days)
    print("解析回帖数据...")
    buf = RFF.getPostDataList(buf)
    spostdate = []
    if days > 0:
        begdate = enddate - datetime.timedelta(days=days)
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    else:
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    del buf
    print("开始统计.")
    spostdate = getPostByAuthor(authorname,spostdate)
    llen = len(spostdate)
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    if days > 30:
        ommit_xlabel_per = days/30  #忽略x label的个数
        ommit_xlabel_per-=1  #同上
        while x<=days:
            feqlist.append(0)
            timeline.append(str(xdate.month)+"-"+str(xdate.day))
            xdate += datetime.timedelta(days=1)
            feqlist[x] = getCountByDate(xdate,spostdate)
            x+=1
            ppp = 0
            while ppp < ommit_xlabel_per and x <= days:
                feqlist.append(0)
                timeline.append("")
                xdate += datetime.timedelta(days=1)
                feqlist[x] = getCountByDate(xdate,spostdate)
                x+=1
                ppp+=1
        xdate -= datetime.timedelta(days=1)
        timeline[len(timeline)-1] == str(xdate.date())
    else:
        while x < days: #初始化频率数组
            feqlist.append(0)
            timeline.append(str(xdate.month)+"-"+str(xdate.day))
            xdate += datetime.timedelta(days=1)
            feqlist[x] = getCountByDate(xdate,spostdate)
            x+=1
    #开始绘图
    drawGraphic.linePlotGraphics('时间','出现次数(帖子/回帖总数:'+str(llen)+')',timeline,feqlist,"【"+ authorname +'】的活跃程度图('+ str(begdate.date()) + "->" + str(enddate.date()) +")")
    print('>>>>>图像加载完毕')
def showLastDays(word, days):
    print("加载任务结果文件...")
    buf = RFF.openResult()
    datebuf = RFF.getDateList(buf)
    begdate, enddate = getTimeDomain(datebuf)
    del datebuf
    print("计算时间区间...")
    begdate = enddate - datetime.timedelta(days=days)
    print("解析回帖数据...")
    buf = RFF.getPostDataList(buf)
    spostdate = getPostDatebyTimeDomain(begdate, enddate, buf)
    del buf
    print("开始统计.")
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    if days > 30:
        ommit_xlabel_per = days / 30  #忽略x label的个数
        ommit_xlabel_per -= 1  #同上
        while x <= days:
            feqlist.append(0)
            timeline.append(str(xdate.month) + "-" + str(xdate.day))
            xdate += datetime.timedelta(days=1)
            x += 1
            ppp = 0
            while ppp < ommit_xlabel_per and x <= days:
                feqlist.append(0)
                timeline.append("")
                xdate += datetime.timedelta(days=1)
                x += 1
                ppp += 1
        xdate -= datetime.timedelta(days=1)
        timeline[len(timeline) - 1] == str(xdate.date())
    else:
        while x < days:  #初始化频率数组
            feqlist.append(0)
            timeline.append(str(xdate.month) + "-" + str(xdate.day))
            xdate += datetime.timedelta(days=1)
            x += 1
    #sposdate:[ [内容,作者,时间],[......],...... ]
    for post in spostdate:
        if post[0].find(word) > -1:
            satpos = (datetime.datetime.strptime(post[2], "%Y-%m-%d %H:%M") -
                      begdate).days
            feqlist[satpos - 1] += 1
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间', '出现次数(帖子/回帖总数:' + str(len(spostdate)) + ')', timeline, feqlist,
        "【" + word + '】的时间频率图(' + str(begdate.date()) + "->" +
        str(enddate.date()) + ")")
    print('>>>>>图像加载完毕')
Пример #6
0
def showLastDays(authorname, days):
    print("获取数据集中的最近时间...")
    enddate = RFF.queryDatasourceLatestTime()
    spostdate = []
    print("计算时间区间...")
    if days > 0:
        begdate = enddate - datetime.timedelta(days=days)
        print('时间区间:', begdate, '->', enddate)
    else:
        begdate = RFF.queryDatasourceEarlyTime()
        print('时间区间:', begdate, '->', enddate)
    print("获取回帖列表...")
    spostdate = RFF.queryContainListAfterTime(authorname, str(begdate))
    llen = len(spostdate)
    print("开始统计.")
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    if days > 30:
        ommit_xlabel_per = days / 30  #忽略x label的个数
        ommit_xlabel_per -= 1  #同上
        while x <= days:
            feqlist.append(0)
            timeline.append(str(xdate.month) + "-" + str(xdate.day))
            xdate += datetime.timedelta(days=1)
            feqlist[x] = getCountByDate(xdate, spostdate)
            x += 1
            ppp = 0
            while ppp < ommit_xlabel_per and x <= days:
                feqlist.append(0)
                timeline.append("")
                xdate += datetime.timedelta(days=1)
                feqlist[x] = getCountByDate(xdate, spostdate)
                x += 1
                ppp += 1
        xdate -= datetime.timedelta(days=1)
        timeline[len(timeline) - 1] == str(xdate.date())
    else:
        while x < days:  #初始化频率数组
            feqlist.append(0)
            timeline.append(str(xdate.month) + "-" + str(xdate.day))
            xdate += datetime.timedelta(days=1)
            feqlist[x] = getCountByDate(xdate, spostdate)
            x += 1
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间', '出现次数(帖子/回帖总数:' + str(llen) + ')', timeline, feqlist,
        "【" + authorname + '】的活跃程度图(' + str(begdate.date()) + "->" +
        str(enddate.date()) + ")")
    print('>>>>>图像加载完毕')
def showLastDays(word,days):
    print("获取数据集中的最近时间...")
    enddate = RFF.queryDatasourceLatestTime()
    print("计算时间区间...")
    begdate = enddate - datetime.timedelta(days=days)
    print('时间区间:',begdate,'->',enddate)
    print("获取回帖列表...")
    spostdate = RFF.queryWordContainListAfterTime(str(begdate))
    print("解析回帖数据...")
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    if days > 30:
        ommit_xlabel_per = days/30  #忽略x label的个数
        ommit_xlabel_per-=1  #同上
        while x<=days:
            feqlist.append(0)
            timeline.append(str(xdate.month)+"-"+str(xdate.day))
            xdate += datetime.timedelta(days=1)
            x+=1
            ppp = 0
            while ppp < ommit_xlabel_per and x <= days:
                feqlist.append(0)
                timeline.append("")
                xdate += datetime.timedelta(days=1)
                x+=1
                ppp+=1
        xdate -= datetime.timedelta(days=1)
        timeline[len(timeline)-1] == str(xdate.date())
    else:
        while x < days: #初始化频率数组
            feqlist.append(0)
            timeline.append(str(xdate.month)+"-"+str(xdate.day))
            xdate += datetime.timedelta(days=1)
            x+=1
    # [ [主题帖链接,贴吧名,作者,帖子内容,发帖时间,回复给sb,所在页面],[......],..... ]
    for post in spostdate:
        if post[3].find(word) > -1:
            satpos = (post[4] - begdate).days
            feqlist[satpos-1]+=1
    #开始绘图
    drawGraphic.linePlotGraphics('时间','出现次数(帖子/回帖总数:'+str(len(spostdate))+')',timeline,feqlist,"【"+ word +'】的时间频率图('+ str(begdate.date()) + "->" + str(enddate.date()) +")")
    print('>>>>>图像加载完毕')
def singleWordTF(word, datalist, scale=30):
    #实现解析时间线,获取最小最大时间范围
    begtime, endtime = getTimeDomain(RFF.getDateList())
    print("对比日期范围:", begtime, "->", endtime)
    c = endtime - begtime
    blocks = int(c.days / scale)
    feqlist = []
    timeline = []
    x = 0
    #初始化频率数组
    print('>>>>>开始处理.....')
    xdate = begtime
    while x <= blocks:
        feqlist.append(0)
        timeline.append(str(xdate.date()))
        xdate += datetime.timedelta(days=scale)
        x += 1
    # [ [[帖子标题,作者,发帖时间] , [回帖列表:[回帖内容,作者,回帖时间],[回帖内容,作者,回帖时间],[[......]],.....]] ]
    for post in datalist:
        if post[0][0].find(word) > -1:
            titledate = datetime.datetime.strptime(post[0][2],
                                                   "%Y-%m-%d %H:%M")
            deltadate = titledate - begtime
            feqpos = int(deltadate.days / scale)
            feqlist[feqpos] += 1
        replylist = post[1]
        for reply in replylist:
            if len(reply) < 3:
                continue
            if reply[0].find(word) > -1:
                replydate = datetime.datetime.strptime(reply[2],
                                                       "%Y-%m-%d %H:%M")
                deltadate = replydate - begtime
                feqpos = int(deltadate.days / scale)
                feqlist[feqpos] += 1
    print('>>>>>处理完成,加载图像中.....')
    print(str(feqlist))
    print(str(timeline), str(feqlist))
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间',
        '出现次数(帖子/回帖总数:' + str(len(datalist * len(datalist[0][0][0]))) + ')',
        timeline, feqlist, '时间频率图(' + str(begtime) + "->" + str(endtime) + ")")
    print('>>>>>图像加载完毕')
Пример #9
0
def activeTimeAnaylize(authorname,days):
    buf = RFF.openResult()
    datebuf = RFF.getDateList(buf)
    begdate,enddate = getTimeDomain(datebuf)
    del datebuf
    spostdate = []
    buf = RFF.getPostDataList(buf)
    if days > 0:
        begdate = enddate - datetime.timedelta(days=days)
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    else:
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    del buf
    spostdate = getPostByAuthor(authorname,spostdate) #[[内容,时间],[...],...]
    tpostdata = sortandget(spostdate)
    tpostdata = gatherbyDays(tpostdata) # [  [date,[ countlist ]    ],    ]
    #for post in tpostdata:
    #    print(str(post))
    #开始分析活跃时间段
    #每天的情况都分析一次,然后叠加求均值
    # [  [date,[ countlist ]    ],    ]
    xvalue = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
    FEQLIST = []
    for post in tpostdata:
        feqlist = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
        for time in post[1]:
            hour = time.hour
            feqlist[hour]+=1
        FEQLIST.append(feqlist)
        print(str(feqlist))
    del tpostdata
    #平均下
    avgfeq = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    hour = 0
    for x in avgfeq:
        sum = 0
        for hoursum in FEQLIST:
            sum+=hoursum[hour]
        avgfeq[hour] = sum
        hour+=1
    print("after add up all :\n\n",str(avgfeq))
    drawGraphic.linePlotGraphics('时间(小时)','发帖次数',xvalue,avgfeq,"【"+ authorname +'】的活跃时间段图(共 '+ str(len(FEQLIST)) +" 天数据)")
Пример #10
0
def showLastYears(word,years):
    print("获取数据集中的最近时间...")
    enddate = RFF.queryDatasourceLatestTime()
    print("计算时间区间...")
    begdate = enddate - datetime.timedelta(days=years*365)
    print('时间区间:',begdate,'->',enddate)
    print("获取回帖列表...")
    spostdate = RFF.queryWordContainListAfterTime(str(begdate))
    print("解析回帖数据...")
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    print("begdate=",begdate,"enddate=",enddate)
    while x <= years: #初始化频率数组
        feqlist.append(0)
        timeline.append(str(xdate.year)+"年")
        print(str(xdate.year))
        xdate += datetime.timedelta(days=365)
        x+=1
    # [ [主题帖链接,贴吧名,作者,帖子内容,发帖时间,回复给sb,所在页面],[......],..... ]
    for post in spostdate:
        if post[3].find(word) > -1:
            postdate = post[4]
            satpos = postdate.year - begdate.year
            #print("satpos=",satpos,"\tpostdate=",postdate,"\tbegdate=",begdate,"\tyear1=",postdate.year,"\tyear2=",begdate.year)
            feqlist[satpos]+=1
    #开始绘图
    drawGraphic.linePlotGraphics('时间','出现次数(帖子/回帖总数:'+str(len(spostdate))+')',timeline,feqlist,"【"+ word +'】的时间频率图('+ str(begdate.year) + "->" + str(enddate.year) +")")
    print('>>>>>图像加载完毕')




#该函数为辅助函数,用于找出时间区间
#返回值:最早时间,最近时间