def activeTimeAnaylize(authorname, days): print("获取数据集中的最近时间...") enddate = RFF.queryDatasourceLatestTime() spostdate = [] print("计算时间区间...") if days > 0: begdate = enddate - datetime.timedelta(days=days) print('时间区间:', begdate, '->', enddate) else: begdate = RFF.queryDatasourceEarlyTime() print('时间区间:', begdate, '->', enddate) print("获取回帖列表...") spostdate = RFF.queryContainListAfterTime(authorname, str(begdate)) llen = len(spostdate) print("开始统计.") #开始统计词频 tpostdata = sortandget(spostdate) tpostdata = gatherbyDays(tpostdata) # [ [date,[ countlist ] ], ] #for post in tpostdata: # print(str(post)) #开始分析活跃时间段 #每天的情况都分析一次,然后叠加求均值 # [ [date,[ countlist ] ], ] xvalue = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 ] FEQLIST = [] for post in tpostdata: feqlist = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] for time in post[1]: hour = time.hour feqlist[hour] += 1 FEQLIST.append(feqlist) print(str(feqlist)) del tpostdata #平均下 avgfeq = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] hour = 0 for x in avgfeq: sum = 0 for hoursum in FEQLIST: sum += hoursum[hour] avgfeq[hour] = sum hour += 1 print("after add up all :\n\n", str(avgfeq)) drawGraphic.linePlotGraphics( '时间(小时)', '发帖次数', xvalue, avgfeq, "【" + authorname + '】的活跃时间段图(共 ' + str(len(FEQLIST)) + " 天数据)")
def showLastDays(authorname, days): print("获取数据集中的最近时间...") enddate = RFF.queryDatasourceLatestTime() spostdate = [] print("计算时间区间...") if days > 0: begdate = enddate - datetime.timedelta(days=days) print('时间区间:', begdate, '->', enddate) else: begdate = RFF.queryDatasourceEarlyTime() print('时间区间:', begdate, '->', enddate) print("获取回帖列表...") spostdate = RFF.queryContainListAfterTime(authorname, str(begdate)) llen = len(spostdate) print("开始统计.") #开始统计词频 feqlist = [] timeline = [] x = 0 xdate = begdate if days > 30: ommit_xlabel_per = days / 30 #忽略x label的个数 ommit_xlabel_per -= 1 #同上 while x <= days: feqlist.append(0) timeline.append(str(xdate.month) + "-" + str(xdate.day)) xdate += datetime.timedelta(days=1) feqlist[x] = getCountByDate(xdate, spostdate) x += 1 ppp = 0 while ppp < ommit_xlabel_per and x <= days: feqlist.append(0) timeline.append("") xdate += datetime.timedelta(days=1) feqlist[x] = getCountByDate(xdate, spostdate) x += 1 ppp += 1 xdate -= datetime.timedelta(days=1) timeline[len(timeline) - 1] == str(xdate.date()) else: while x < days: #初始化频率数组 feqlist.append(0) timeline.append(str(xdate.month) + "-" + str(xdate.day)) xdate += datetime.timedelta(days=1) feqlist[x] = getCountByDate(xdate, spostdate) x += 1 #开始绘图 drawGraphic.linePlotGraphics( '时间', '出现次数(帖子/回帖总数:' + str(llen) + ')', timeline, feqlist, "【" + authorname + '】的活跃程度图(' + str(begdate.date()) + "->" + str(enddate.date()) + ")") print('>>>>>图像加载完毕')
def showLastDays(word,days): print("获取数据集中的最近时间...") enddate = RFF.queryDatasourceLatestTime() print("计算时间区间...") begdate = enddate - datetime.timedelta(days=days) print('时间区间:',begdate,'->',enddate) print("获取回帖列表...") spostdate = RFF.queryWordContainListAfterTime(str(begdate)) print("解析回帖数据...") #开始统计词频 feqlist = [] timeline = [] x = 0 xdate = begdate if days > 30: ommit_xlabel_per = days/30 #忽略x label的个数 ommit_xlabel_per-=1 #同上 while x<=days: feqlist.append(0) timeline.append(str(xdate.month)+"-"+str(xdate.day)) xdate += datetime.timedelta(days=1) x+=1 ppp = 0 while ppp < ommit_xlabel_per and x <= days: feqlist.append(0) timeline.append("") xdate += datetime.timedelta(days=1) x+=1 ppp+=1 xdate -= datetime.timedelta(days=1) timeline[len(timeline)-1] == str(xdate.date()) else: while x < days: #初始化频率数组 feqlist.append(0) timeline.append(str(xdate.month)+"-"+str(xdate.day)) xdate += datetime.timedelta(days=1) x+=1 # [ [主题帖链接,贴吧名,作者,帖子内容,发帖时间,回复给sb,所在页面],[......],..... ] for post in spostdate: if post[3].find(word) > -1: satpos = (post[4] - begdate).days feqlist[satpos-1]+=1 #开始绘图 drawGraphic.linePlotGraphics('时间','出现次数(帖子/回帖总数:'+str(len(spostdate))+')',timeline,feqlist,"【"+ word +'】的时间频率图('+ str(begdate.date()) + "->" + str(enddate.date()) +")") print('>>>>>图像加载完毕')
def showLastYears(word,years): print("获取数据集中的最近时间...") enddate = RFF.queryDatasourceLatestTime() print("计算时间区间...") begdate = enddate - datetime.timedelta(days=years*365) print('时间区间:',begdate,'->',enddate) print("获取回帖列表...") spostdate = RFF.queryWordContainListAfterTime(str(begdate)) print("解析回帖数据...") #开始统计词频 feqlist = [] timeline = [] x = 0 xdate = begdate print("begdate=",begdate,"enddate=",enddate) while x <= years: #初始化频率数组 feqlist.append(0) timeline.append(str(xdate.year)+"年") print(str(xdate.year)) xdate += datetime.timedelta(days=365) x+=1 # [ [主题帖链接,贴吧名,作者,帖子内容,发帖时间,回复给sb,所在页面],[......],..... ] for post in spostdate: if post[3].find(word) > -1: postdate = post[4] satpos = postdate.year - begdate.year #print("satpos=",satpos,"\tpostdate=",postdate,"\tbegdate=",begdate,"\tyear1=",postdate.year,"\tyear2=",begdate.year) feqlist[satpos]+=1 #开始绘图 drawGraphic.linePlotGraphics('时间','出现次数(帖子/回帖总数:'+str(len(spostdate))+')',timeline,feqlist,"【"+ word +'】的时间频率图('+ str(begdate.year) + "->" + str(enddate.year) +")") print('>>>>>图像加载完毕') #该函数为辅助函数,用于找出时间区间 #返回值:最早时间,最近时间