def is_valid_username(username): if not username: return False # First check if it's an existing user in the db if User.all().filter('steam_username ='******'Error' not in page_soup.find('title').string
def run(self, screenshot_page_url): if self.user is None: logging.warning('Attempting to get screenshot for an empty user') return # First check if screenshot exists in db already parsed_url = urlparse.urlparse(screenshot_page_url) screenshot_id = urlparse.parse_qs(parsed_url.query)['id'][0] if Screenshot.all().filter('screenshot_id =', screenshot_id).get() is not None: logging.debug('Screenshot already exists in database: ' + screenshot_id) return # Then check if we got 200 from Steam page_soup = helpers.request_soup(screenshot_page_url) if page_soup is None: return logging.info('Retrieved screenshot: ' + screenshot_page_url) # TODO Smarter check for spoiler tag # # Apparently the spoiler html element is only on the desktop version # so I probably have to schedule a separate task just to search the # desktop listings for potential spoilers screenshot_src = page_soup.find('img', class_='userScreenshotImg')['src'] screenshot_desc = page_soup.find('h1', class_='captionText').string.strip() screenshot_is_spoiler = "spoiler" in screenshot_desc.lower() screenshot_is_nsfw = "nsfw" in screenshot_desc.lower() screenshot_game = page_soup.find(id='gameName').find('a', class_='itemLink').string.strip() s = Screenshot( parent = self.user, screenshot_id = screenshot_id, url = screenshot_page_url, src = screenshot_src, desc = screenshot_desc, game = screenshot_game, is_spoiler = screenshot_is_spoiler, is_nsfw = screenshot_is_nsfw ) result = s.put() if result is db.TransactionFailedError: logging.error('Failed to save screenshot for ' + self.user.steam_username + ' id:' + screenshot_id) else: logging.debug('Successfully saved screenshot for ' + self.user.steam_username + ' id:' + screenshot_id)
def run(self, current_page_url): if self.user is None: logging.warning('Attempting to get listing for an empty user') return logging.info('Processing \'' + current_page_url + '\' steam_username:'******' last_scraped:' + self.user.last_scraped.strftime('%Y-%m-%d %H:%M:%S.%f')) page_soup = helpers.request_soup(current_page_url) # Check if we got 200 from Steam if page_soup is None: return # Iterate over this page's screenshots screenshot_links = page_soup.find_all('a', class_='userScreenshotLink') for link in screenshot_links: screenshot_queue.add(taskqueue.Task( url = '/scraper/screenshot', params = { HEADER_STEAM_USERNAME_KEY: self.user.steam_username, HEADER_NEXT_PAGE_KEY: self.fix_url(link['href']) } )) # Queue up next page or update the user's last_scraped time next_page_arrow = page_soup.find('img', class_='pagingRightArrowImg') if next_page_arrow is None: self.user.put() self.user = None # Cleanup else: listing_queue.add(taskqueue.Task( url = '/scraper/listing', params = { HEADER_STEAM_USERNAME_KEY: self.user.steam_username, HEADER_NEXT_PAGE_KEY: self.fix_url(next_page_arrow.parent['href']) } ))