Пример #1
0
def userActivityGenerate(startDate=''):
    videoList = select(DB_NAME, DB_TB_VIDEO, ['id', 'channelid', 'categoryid'],
                       ['useractivityflag'], [{
                           'useractivityflag': 'N'
                       }])
    count = 0
    batchNum = 100
    for video in videoList:
        startDate = "2015-08-01T00:00:00"
        endDate = getTimestampNow()
        for dateValue in getDateRangeList(startDate, endDate, 1):
            dataSet = []
            for topic, value in topicRndSeedDict.items():
                for dateTime in generateRandomTimeStr(
                        dateValue,
                        randint(1, value / 3) * randint(1, value)):
                    dataSet.append(
                        userActivityRandom(topic,
                                           vid=video[0],
                                           cid=video[1],
                                           caid=video[2],
                                           dateStr=dateTime))
            flush2Local(batchNum, ''.join(dataSet))
        count = count + 1
        print '----', count
        update(DB_NAME, DB_TB_VIDEO, ['useractivityflag'], ['id'],
               [{
                   'useractivityflag': 'Y',
                   'id': video[0]
               }])
        if count > 99:
            break
            batchNum = batchNum + 1
            count = 0
Пример #2
0
def userActivityRandom(topic, vid='', cid='', caid='', dateStr=''):
    dataList = []
    # Sample output: 2015-09-30T16:40:00Z category channel video userview
    dataList.append(dateStr if dateStr != '' else getTimestampNow())
    dataList.append(caid if caid != '' else 'ca_rnd' + str(randint(1, 20)))
    dataList.append(cid if cid != '' else 'ch_rnd' + str(randint(1, 10000)))
    dataList.append(vid if vid != '' else 'v_rnd' + str(randint(1, 100000)))
    dataList.append(topic)
    return ' '.join(dataList) + '\n'
Пример #3
0
def video_search():
    if 'videokeyword' in request.form:
        keyword = request.form["videokeyword"].strip()
        keyword = 'youtube' if keyword == '' else keyword
        Filter = str(urllib.urlencode({"q": keyword, 'type': 'video'}))
        videoJSON = getJSONData('search',
                                Filter,
                                part='snippet',
                                maxResults=True)
        videoDataList = parseSearchJSON(videoJSON, 'videoId')
        return render_template("video.html", videoList=videoDataList)
    else:
        videoInfo = request.form.getlist("videoinfo")
        videoInfo = videoInfo[0]
        videoTitle = videoInfo[videoInfo.rfind(':') + 1:len(videoInfo)]
        mode = request.form["mode"]
        useractivity = str(request.form["activitytype"])
        videoStatCount = int(request.form['datetimerange'])
        datetimeRangeList = []
        startDate = getTimestampNow()
        if mode == '_hourly':
            videoStatCount = videoStatCount * 2
            datetimeRangeList = getDatetimeFromStartList(count=videoStatCount)
        else:
            endDate = getDateFromStart(startDateStr=startDate,
                                       offset=videoStatCount,
                                       ago=True)
            datetimeRangeList = getDateRangeList(startDate, endDate, offset=1)
        resultTuple = getVideoById(videoId=getRandomVideoId(),
                                   videoStatCount=videoStatCount,
                                   useractivity=useractivity,
                                   mode=mode)
        resultTuple = getRandomValueList(count=int(videoStatCount),
                                         useractivity=useractivity,
                                         mode=mode)
        videoDictList = [{'name': videoTitle, 'data': resultTuple[0]}]
        videoDictAccumList = [{'name': videoTitle, 'data': resultTuple[1]}]
        # total = int(resultTuple[1][len(resultTuple[1] - 1)]) - int(resultTuple[1][0])
        total = videoDictAccumList[0]['data']
        total = total[len(total) - 1] - total[0]
        if mode == '_hourly':
            mode = 'hours'
        else:
            mode = 'days'
        useractivity = useractivity[0:len('user')] + ' ' + useractivity[
            len('user'):len(useractivity)]
        return render_template('videostat.html',
                               videoTitle=videoTitle,
                               videoDictList=videoDictList,
                               videoDictAccumList=videoDictAccumList,
                               datetimeRangeList=datetimeRangeList,
                               useractivity=useractivity,
                               datetimerange=request.form['datetimerange'],
                               mode=mode,
                               total=total)
Пример #4
0
def saveVIdByChannelActivity(channelId, ALL=False):
    idSet = getVIdByChannelActivity(channelId, ALL)
    if len(idSet) > 0:
        insertQ = "insert into " + DB_NAME + "." + DB_TB_VIDEO + " (id, channelid) values "
        for i in xrange(0, len(idSet) - 1):
            insertQ = insertQ + "('" + idSet.pop() + "','" + channelId + "'),"
        insertQ = insertQ + "('" + idSet.pop() + "','" + channelId + "');"
        print insertQ
        execute_query(insertQ)
    update(DB_NAME, DB_TB_CHANNEL, ['activityDate'], ['id'],
           [{
               'id': channelId,
               'activityDate': getTimestampNow()
           }])
Пример #5
0
def getVIdByChannelActivity(channelId, ALL=False):
    channel = select(DB_NAME, DB_TB_CHANNEL, ["publishedAt", "activityDate"],
                     ['id'], [{
                         'id': channelId
                     }])
    if len(channel) > 0:
        dateStr = ""
        if ALL or (len(channel[0][1]) == 0):
            dateStr = channel[0][0]
        else:
            dateStr = channel[0][1]
        return getVIdByChannelActivityDate(channelId, dateStr,
                                           getTimestampNow())
    return ([])
Пример #6
0
def flush2HDFS(dataSet, dateStr=''):
    dateStr = parseDateString(dateStr)
    if dateStr == "":
        dateStr = parseDateString(getTimestampNow())
    localPath = LOCAL_TEMP_PATH + "/"
    localFilePath = localPath + "/" + str(dateStr) + FILE_TYPE
    hdfsPath = HDFS_DEFAULT_PATH + '/'
    if not os.path.exists(localPath):
        os.system('sudo mkdir ' + localPath)
    if not os.path.exists(localFilePath):
        os.mknod(localFilePath)
    else:
        os.system("hdfs dfs -rm %s " % (hdfsPath))
    tempfile = open(localFilePath, "a")  # append mode
    tempfile.write(dataSet)
    os.system("hdfs dfs -put -f %s %s" % (localFilePath, hdfsPath))
    tempfile.close()
Пример #7
0
def channel_search():
    if 'channelkeyword' in request.form:
        keyword = request.form["channelkeyword"].strip()
        keyword = 'youtube' if keyword == ''  else keyword
        Filter = str(urllib.urlencode({"q":keyword, 'type':'channel'}))
        channelJSON = getJSONData('search', Filter, part='snippet', maxResults=True)
        channelDataList = parseSearchJSON(channelJSON, 'channelId')
        return render_template("channel.html", channelList=channelDataList)
    else:
        topn = int(request.form["topn"])
        channelInfo = request.form["channelinfo"]
        channelId = channelInfo[0:channelInfo.rfind(':')]
        channelTitle = channelInfo[channelInfo.rfind(':') + 1:len(channelInfo)]
        startDate = getTimestampNow()
        useractivity = request.form["activitytype"]
        endDate = str(getDateFromStart(str(startDate), int(request.form["daterange"]), True)) + 'T'
        dateRangeList = getDateRangeList(startDate, endDate, offset=1)
        
        resultTuple = []
        resultTuple = scanVideoByChannel(channelid=getRandomChannelID(), topn=topn,
                                useractivity=useractivity, mode='_daily',
                                dateRangeList=dateRangeList)
        useractivity = useractivity[0:len('user')] + ' ' + useractivity[len('user') :len(useractivity)]
        Filter = str(urllib.urlencode({"channelId":channelId, 'type':'video'}))
        videoJSON = getJSONData('search', Filter, part='snippet', maxResults=True)
        videoList = parseSearchJSON(videoJSON, 'videoId')[0:topn]
        videoDictList = jsonifyVideo(videoList=videoList, dataList=resultTuple[0])
        videoDictAccumList = jsonifyVideo(videoList=videoList, dataList=resultTuple[1])
        videoWeightList = []
        for i in xrange(0, len(videoDictAccumList)):
            valueList = videoDictAccumList[i]['data']
            value = valueList[len(valueList) - 1] - valueList[0]
            name = videoDictAccumList[i]['name']
            name = name[0:30] + '...' if len(name) > 31 else name
            videoWeightList.append([name, value])
        return render_template('channelvideo.html', channelTitle=channelTitle, useractivity=useractivity,
                        topn=topn, daterange=request.form["daterange"], dateRangeList=dateRangeList,
                        videoDictList=videoDictList, videoDictAccumList=videoDictAccumList,
                        videoWeightList=videoWeightList)