Пример #1
0
    def get_series_info(self):
        """
        Gets the Series info for a TVShow and puts it into the GAE Datastore.
        The information which is scraped includes show rating, show name, show description and show status.
        """

        if not self.disbable_tvshow_scraping:
            if not self.rescrape:
                # Generate the fanart URL if fanart exists. Used to generate the Hexagon image for the show
                fanart_url = TVDB_BANNER_URL + self.tvdbsoup.fanart.text if self.tvdbsoup.fanart.text else None
            else:
                show = TVShow.get_by_key_name(self.tvdb_id)
                fanart_url = show.fanart

            # Identify the show genres
            genres = self.tvdbsoup.Genre.text.strip("|").split("|")

            # Find the number of seasons
            num_of_seasons = (
                int(self.tvdbsoup.find_all("SeasonNumber")[-1].text) if self.tvdbsoup.find_all("SeasonNumber") else 0
            )

            # If this is a new scrape and the number of seasons is greated than 10 disable
            # episode rating scraping to conserve app engine quota
            if not self.rescrape and num_of_seasons > 10:
                self.disable_episode_ratings = 1
                self.options_array[1] = 1

            # Put the scraped information into the GAE datastore
            tv_show = TVShow(
                key_name=self.tvdb_id,
                title=self.tvdbsoup.SeriesName.text,
                desc=self.tvdbsoup.Overview.text,
                rating=float(self.get_imdb_rating(self.tvdbsoup.IMDB_ID.text)),
                fanart=fanart_url,
                genre=genres[0] if len(genres) > 0 else None,
                subgenre=genres[1] if len(genres) > 1 else None,
                status=self.tvdbsoup.Status.text,
                imdb_id=self.tvdbsoup.IMDB_ID.text,
                url_string=self.slug,
                last_scraped=datetime.utcfromtimestamp(0),
                num_seasons=num_of_seasons,
            ).put()

            # Obtain the key for the TVShow
            self.series_key = tv_show
        else:
            # Get the series key from the datastore
            self.series_key = TVShow.get_by_key_name(self.tvdb_id)
            fanart_url = self.series_key.fanart

        # If fanart exists generate a hexagon image and store in the datastore
        if not self.disbable_fanart_scraping and fanart_url:
            hexagon_image = Hexagon(fanart_url).get_hex()

            # Check if the hexagon is valid
            if hexagon_image:
                HexImages(parent=self.series_key, key_name=self.tvdb_id, image=db.Blob(hexagon_image)).put()
Пример #2
0
def email_update(request):
    """
    A function which finds all the users who are subscribed to shows airing this week
    and for each user, generates an email with a list of these shows and when they're airing

    :param request: The request object for the page
    :return: A HttpResponse Object, which renders a page specifying how many emails were sent
    """

    # Get all the users
    q = db.GqlQuery("SELECT * FROM User")

    # Calculate 7 days from now to use in the query
    weektoday = date.today() + timedelta(days=7)

    messages_sent = 0
    for user in q.run():
        # For each user get the shows they're subscribed to
        show_query = db.GqlQuery("SELECT * FROM UserShow WHERE user_id = :id", id=user.key().name())
        show_ids = [str(show.show_id) for show in show_query.run()]

        # Don't send email if they're not subscribed to any shows
        if len(show_ids) == 0:
            continue

        shows = TVShow.get_by_key_name(show_ids)

        # For all the shows subscribed to - find the shows which have episodes airing this week
        episodes_this_week = {date.today() + timedelta(days=k) : [] for k in range(0, 7)}
        ep_this_week = False
        for showid in shows:
            episodes_query = db.GqlQuery(
                "SELECT * FROM TVEpisode WHERE airdate >= :today AND airdate < :weektoday AND ANCESTOR IS :showid ORDER BY airdate",
                today=date.today(), weektoday=weektoday, showid=showid)

            # Create a list of all the episodes
            episodes = [episode for episode in episodes_query.run()]

            # Map the date for an episode to a dictionary containing the show title, the episode name 
            # and the season and episode number
            for episode in episodes:
                ep_this_week = True
                episodes_this_week[episode.airdate].append({'show_title' : showid.title, 'ep_name' : episode.name,
                                                            'season_num' : episode.season, 'ep_num': episode.ep_number })  

        # Don't send email if there are no episode airing this week
        if not ep_this_week:
            continue

        # Construct a message containing the episodes for this week
        message = "Hello Telehex Subscriber,\n\nHere are your shows airing this week:\n\n"
        for key in sorted(episodes_this_week):
            if len(episodes_this_week[key]) == 0:
                continue
            message += "{0}:\n".format(key.strftime("%B %d, %Y"))
            for show_ep in episodes_this_week[key]:
                message += "\t{0} - {1} (S{2:02d}E{3:02d})\n".format(show_ep['show_title'], show_ep['ep_name'],
                                                                    show_ep['season_num'], show_ep['ep_num'])
            message += "\n\n"

        # Get the server to send the mail
        mail.send_mail(sender="*****@*****.**",
                       to="{0}".format(user.email),
                       subject="Telehex - Your weekly episode email",
                       body=message)

        messages_sent += 1

    # Task complete, return a response with the number of messages sent
    return render(request, 'telehex/email_update.html', {"messages_sent": messages_sent})
Пример #3
0
    def __init__(self, tvdb_id, rescrape=False, options="00000000", update_options=True):
        """
        The :class:Scraper class constructor. Takes a tvdb_id and initialises the scraping for the TVShow
        and the TVEpisodes

        :param tvdb_id: The tvdb id was the show to be scraped.
        :param options: 
            The options param is used to specify options for the scraping and display of a show
            Each character index of the string represents a specific option. These are
                0. Disable Scraping:
                    0 = scraping enabled, 1 = scraping disabled
                1. Disable Episode Ratings Scraping:
                    0 = episode scraping enabled, 1 = episode scraping disabled
                2. Disable Fanart Scraping:
                    0 = fanart scraping enabled, 1 = fanart scraping disabled
                3. Disable TVShow Scraping
                    0 = TVShow scraping enabled, 1 = TVShow scraping disabled
                4. Disable TVEpisode Scraping
                    0 = TVEpisode scraping enabled, 1 = TVEpisode scraping disabled
                5. Disable Episode Description Display
                    0 = display episode desc, 1 = don't display episode desc
                6. Disable Episode Display
                    0 = display episodes, 1 = don't display episodes
                7. Reserved
        """
        self.series_key = None
        self.rating = -1

        # Determine if this is a new scrape or a rescrape
        self.rescrape = rescrape

        # If this is a first scrape then set options to default
        if not self.rescrape:
            self.options = "00000000"
        else:
            self.options = options

        # Create the options array
        self.options_array = map(int, list(options))

        # Specify the options relevant to the scraping
        self.disable_scraping = self.options_array[0]
        self.disable_episode_ratings = self.options_array[1]
        self.disbable_fanart_scraping = self.options_array[2]
        self.disbable_tvshow_scraping = self.options_array[3]
        self.disbable_tvepisode_scraping = self.options_array[4]

        # If the scraping isn't disable do this
        if not self.disable_scraping:
            # Increase the timeout for fetching a url - required for large shows
            urlfetch.set_default_fetch_deadline(60)

            self.tvdb_id = tvdb_id

            # Fetch the XML from tvdb and turn into a BeautifulSoup Object
            self.tvdbxml = urllib2.urlopen(
                "http://thetvdb.com/api/{0}/series/{1}/all/en.xml".format(API_KEY, self.tvdb_id)
            )
            self.tvdbsoup = BeautifulSoup(self.tvdbxml.read(), "xml")

            # Generate the show slug for the show, e.g. Breaking Bad becomes breaking_bad
            exclude_chars = set(string.punctuation)
            self.slug = "".join(char for char in self.tvdbsoup.SeriesName.text if char not in exclude_chars)
            self.slug = re.sub(r"\W+", "_", self.slug.lower())

            # Perform the scraping for the TVShow
            self.get_series_info()

            if not self.disbable_tvepisode_scraping:
                # Perform the scraping for the TVEpisodes
                self.get_episode_info()

            # Specify when the show was last scraped
            q = TVShow.get_by_key_name(self.tvdb_id)
            q.last_scraped = datetime.now()
            if update_options:
                q.options = "".join(str(x) for x in self.options_array)
            q.put()