def userActivityGenerate(startDate=''): videoList = select(DB_NAME, DB_TB_VIDEO, ['id', 'channelid', 'categoryid'], ['useractivityflag'], [{ 'useractivityflag': 'N' }]) count = 0 batchNum = 100 for video in videoList: startDate = "2015-08-01T00:00:00" endDate = getTimestampNow() for dateValue in getDateRangeList(startDate, endDate, 1): dataSet = [] for topic, value in topicRndSeedDict.items(): for dateTime in generateRandomTimeStr( dateValue, randint(1, value / 3) * randint(1, value)): dataSet.append( userActivityRandom(topic, vid=video[0], cid=video[1], caid=video[2], dateStr=dateTime)) flush2Local(batchNum, ''.join(dataSet)) count = count + 1 print '----', count update(DB_NAME, DB_TB_VIDEO, ['useractivityflag'], ['id'], [{ 'useractivityflag': 'Y', 'id': video[0] }]) if count > 99: break batchNum = batchNum + 1 count = 0
def userActivityRandom(topic, vid='', cid='', caid='', dateStr=''): dataList = [] # Sample output: 2015-09-30T16:40:00Z category channel video userview dataList.append(dateStr if dateStr != '' else getTimestampNow()) dataList.append(caid if caid != '' else 'ca_rnd' + str(randint(1, 20))) dataList.append(cid if cid != '' else 'ch_rnd' + str(randint(1, 10000))) dataList.append(vid if vid != '' else 'v_rnd' + str(randint(1, 100000))) dataList.append(topic) return ' '.join(dataList) + '\n'
def video_search(): if 'videokeyword' in request.form: keyword = request.form["videokeyword"].strip() keyword = 'youtube' if keyword == '' else keyword Filter = str(urllib.urlencode({"q": keyword, 'type': 'video'})) videoJSON = getJSONData('search', Filter, part='snippet', maxResults=True) videoDataList = parseSearchJSON(videoJSON, 'videoId') return render_template("video.html", videoList=videoDataList) else: videoInfo = request.form.getlist("videoinfo") videoInfo = videoInfo[0] videoTitle = videoInfo[videoInfo.rfind(':') + 1:len(videoInfo)] mode = request.form["mode"] useractivity = str(request.form["activitytype"]) videoStatCount = int(request.form['datetimerange']) datetimeRangeList = [] startDate = getTimestampNow() if mode == '_hourly': videoStatCount = videoStatCount * 2 datetimeRangeList = getDatetimeFromStartList(count=videoStatCount) else: endDate = getDateFromStart(startDateStr=startDate, offset=videoStatCount, ago=True) datetimeRangeList = getDateRangeList(startDate, endDate, offset=1) resultTuple = getVideoById(videoId=getRandomVideoId(), videoStatCount=videoStatCount, useractivity=useractivity, mode=mode) resultTuple = getRandomValueList(count=int(videoStatCount), useractivity=useractivity, mode=mode) videoDictList = [{'name': videoTitle, 'data': resultTuple[0]}] videoDictAccumList = [{'name': videoTitle, 'data': resultTuple[1]}] # total = int(resultTuple[1][len(resultTuple[1] - 1)]) - int(resultTuple[1][0]) total = videoDictAccumList[0]['data'] total = total[len(total) - 1] - total[0] if mode == '_hourly': mode = 'hours' else: mode = 'days' useractivity = useractivity[0:len('user')] + ' ' + useractivity[ len('user'):len(useractivity)] return render_template('videostat.html', videoTitle=videoTitle, videoDictList=videoDictList, videoDictAccumList=videoDictAccumList, datetimeRangeList=datetimeRangeList, useractivity=useractivity, datetimerange=request.form['datetimerange'], mode=mode, total=total)
def saveVIdByChannelActivity(channelId, ALL=False): idSet = getVIdByChannelActivity(channelId, ALL) if len(idSet) > 0: insertQ = "insert into " + DB_NAME + "." + DB_TB_VIDEO + " (id, channelid) values " for i in xrange(0, len(idSet) - 1): insertQ = insertQ + "('" + idSet.pop() + "','" + channelId + "')," insertQ = insertQ + "('" + idSet.pop() + "','" + channelId + "');" print insertQ execute_query(insertQ) update(DB_NAME, DB_TB_CHANNEL, ['activityDate'], ['id'], [{ 'id': channelId, 'activityDate': getTimestampNow() }])
def getVIdByChannelActivity(channelId, ALL=False): channel = select(DB_NAME, DB_TB_CHANNEL, ["publishedAt", "activityDate"], ['id'], [{ 'id': channelId }]) if len(channel) > 0: dateStr = "" if ALL or (len(channel[0][1]) == 0): dateStr = channel[0][0] else: dateStr = channel[0][1] return getVIdByChannelActivityDate(channelId, dateStr, getTimestampNow()) return ([])
def flush2HDFS(dataSet, dateStr=''): dateStr = parseDateString(dateStr) if dateStr == "": dateStr = parseDateString(getTimestampNow()) localPath = LOCAL_TEMP_PATH + "/" localFilePath = localPath + "/" + str(dateStr) + FILE_TYPE hdfsPath = HDFS_DEFAULT_PATH + '/' if not os.path.exists(localPath): os.system('sudo mkdir ' + localPath) if not os.path.exists(localFilePath): os.mknod(localFilePath) else: os.system("hdfs dfs -rm %s " % (hdfsPath)) tempfile = open(localFilePath, "a") # append mode tempfile.write(dataSet) os.system("hdfs dfs -put -f %s %s" % (localFilePath, hdfsPath)) tempfile.close()
def channel_search(): if 'channelkeyword' in request.form: keyword = request.form["channelkeyword"].strip() keyword = 'youtube' if keyword == '' else keyword Filter = str(urllib.urlencode({"q":keyword, 'type':'channel'})) channelJSON = getJSONData('search', Filter, part='snippet', maxResults=True) channelDataList = parseSearchJSON(channelJSON, 'channelId') return render_template("channel.html", channelList=channelDataList) else: topn = int(request.form["topn"]) channelInfo = request.form["channelinfo"] channelId = channelInfo[0:channelInfo.rfind(':')] channelTitle = channelInfo[channelInfo.rfind(':') + 1:len(channelInfo)] startDate = getTimestampNow() useractivity = request.form["activitytype"] endDate = str(getDateFromStart(str(startDate), int(request.form["daterange"]), True)) + 'T' dateRangeList = getDateRangeList(startDate, endDate, offset=1) resultTuple = [] resultTuple = scanVideoByChannel(channelid=getRandomChannelID(), topn=topn, useractivity=useractivity, mode='_daily', dateRangeList=dateRangeList) useractivity = useractivity[0:len('user')] + ' ' + useractivity[len('user') :len(useractivity)] Filter = str(urllib.urlencode({"channelId":channelId, 'type':'video'})) videoJSON = getJSONData('search', Filter, part='snippet', maxResults=True) videoList = parseSearchJSON(videoJSON, 'videoId')[0:topn] videoDictList = jsonifyVideo(videoList=videoList, dataList=resultTuple[0]) videoDictAccumList = jsonifyVideo(videoList=videoList, dataList=resultTuple[1]) videoWeightList = [] for i in xrange(0, len(videoDictAccumList)): valueList = videoDictAccumList[i]['data'] value = valueList[len(valueList) - 1] - valueList[0] name = videoDictAccumList[i]['name'] name = name[0:30] + '...' if len(name) > 31 else name videoWeightList.append([name, value]) return render_template('channelvideo.html', channelTitle=channelTitle, useractivity=useractivity, topn=topn, daterange=request.form["daterange"], dateRangeList=dateRangeList, videoDictList=videoDictList, videoDictAccumList=videoDictAccumList, videoWeightList=videoWeightList)