def get(self): from models import VideoData, VideoViewsData """ Selects videos from database and tracks their views over time """ # get current datetime now = datetime.datetime.now() # query db for videos which have been flagged videos_to_check = VideoData.gql("WHERE checkMeFlag = True") # CHANGE THIS BACK TO TRUE WHEN DEPLOYING logging.info('Checking %i videos', videos_to_check.count()) for video in videos_to_check: # get the current number of views newViewsEntry = self.getEntryData(video.token) new_views_data = VideoViewsData(video=video, dateTime=now, views=newViewsEntry, collection_name="views") new_views_data.put() video.checkMeFlag = False video.put()
def searchThis(self, search): from models import VideoData, VideoViewsData, VideoSearchIndex search.lastQuery = datetime.datetime.now() br = gaemechanize.Browser() # Browser options br.set_handle_equiv(True) br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) # User-Agent (this is cheating, ok?) br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] # The site we will navigate into, handling its session br.open('http://www.youtube.com') # Scrape First Page Looking for Forms br.select_form(nr=1) # Executes Query with Given Word br.form['search_query'] = search.queryText br.submit() # Finds all links the page search_links = [l for l in br.links()] linkcounter = 0 for link in search_links: linkcounter += linkcounter # Selects By Upload Rate (it's a hack now, needs to be context independent) br.follow_link(search_links[16]) html = br.response().read() soup = BeautifulSoup(html) soup.prettify() # Creates Video List For Results search_results = soup.findAll('div', attrs = {'class': "result-item *sr "}) # Store in DB new_video = VideoData() for result in search_results: # strip token from youtube url vidtoken = self.scrapeVideoInfo(result)['url'][31:42] # Create a new VideoData object with the video token new_video = VideoData(key_name=vidtoken) # If it doesn't exist already. TODO #if VideoData.get(new_video.key()) is None: new_video.token = vidtoken new_video.json = simplejson.dumps(self.scrapeVideoInfo(result)) viewsDate, views = self.scrapeVideoViews(result) views_object = VideoViewsData(dateTime=viewsDate, views=views, video=new_video) views_object.put() new_video_searchlist = VideoSearchIndex(key_name=new_video.token, parent=new_video) new_video_searchlist.searchTerms.append(search.key()) new_video_searchlist.put() new_video.alertLevel = "initial" new_video.checkMeFlag = False new_video.put()