def record_heatmaps(mongodb): """ Record heatmap bins for each video, based on segments for a single video? """ start_time = time.time() # TODO: handle cut segments (i.e., start event exists but end event missing) # TODO: only remove the corresponding entries in the database: (video, user) collection = mongodb['video_segments'] segments = list(collection.find()) collection = mongodb['video_heatmaps'] collection.remove() print len(segments), "segments found" results = defaultdict(dict) for segment in segments: if not segment["user_id"] in results[segment["video_id"]]: results[segment["video_id"]][segment["user_id"]] = [] results[segment["video_id"]][segment["user_id"]].append(segment) vid_col = mongodb['videos'] for video_id in results: result = list(vid_col.find({"video_id": video_id})) if len(result): process_heatmaps(mongodb, results[video_id], video_id, result[0]["duration"]) else: print "ERROR in video information retrieval" # Make sure the collection is indexed. from pymongo import ASCENDING collection.ensure_index([("video_id", ASCENDING)]) # [("video_id", ASCENDING), ("time", ASCENDING)]) print sys._getframe().f_code.co_name, "COMPLETED", (time.time() - start_time), "seconds"
def record_heatmaps_ajax(mongodb, index): """ Record heatmap bins for each video, based on segments for a single video? """ bin_size = 100000 start_time = time.time() collection = mongodb[HEATMAPS_COL] collection.remove() # TODO: handle cut segments (i.e., start event exists but end event missing) # TODO: only remove the corresponding entries in the database: (video, user) vid_col = mongodb['videos'] video_list = list(vid_col.find()) num_videos = len(video_list) for index, video in enumerate(video_list): video_id = video["video_id"] loop_start_time = time.time() collection = mongodb[SEGMENTS_COL] segments = list(collection.find({"video_id": video_id})) #segments = collection.find().limit(bin_size).skip(index*bin_size) #.batch_size(1000) print index, "/", num_videos, video_id, ":", len(segments), "segments", (time.time() - loop_start_time), "seconds" if len(segments): loop_start_time2 = time.time() results = defaultdict(dict) for segment in segments: if not segment["user_id"] in results[segment["video_id"]]: results[segment["video_id"]][segment["user_id"]] = [] results[segment["video_id"]][segment["user_id"]].append(segment) process_heatmaps(mongodb, results[video_id], video_id, video["duration"]) print (time.time() - loop_start_time2), "seconds" # Make sure the collection is indexed. from pymongo import ASCENDING collection.ensure_index([("video_id", ASCENDING)]) # [("video_id", ASCENDING), ("time", ASCENDING)]) print sys._getframe().f_code.co_name, "COMPLETED", (time.time() - start_time), "seconds"