def update_tmdb_data_week(session: sqlalchemy.orm.Session, year: int, week: int) -> None: """ Update the ShowData entries in a week with data from TMDB. :param session: the db session. :param year: the year. :param week: the week. """ week_start = datetime.date.fromisocalendar(year, week, 1) week_end = datetime.date.fromisocalendar(year, week, 7) start_datetime = datetime.datetime.combine(week_start, datetime.time(0, 0, 0)) end_datetime = datetime.datetime.combine(week_end, datetime.time(23, 59, 59)) shows = db_calls.get_shows_interval(session, start_datetime, end_datetime) for s in shows: if s.is_movie and (s.year is None or s.year < 2010): continue tmdb_show = tmdb_calls.get_show_using_id(session, s.tmdb_id, s.is_movie) if tmdb_show: s.tmdb_vote_count = tmdb_show.vote_count s.tmdb_vote_average = tmdb_show.vote_average s.tmdb_popularity = tmdb_show.popularity time.sleep(0.1) db_calls.commit(session)
def clear_tokens_list(session): """Delete tokens that have expired, from the DB.""" today = datetime.date.today() session.query(models.Token) \ .filter(models.Token.expiration_date < today) \ .delete() db_calls.commit(session)
def clear_unverified_users(session): """Delete unverified users after 30 days, from the DB.""" today = datetime.date.today() session.query(models.User) \ .filter(models.User.verified == False) \ .filter(today > models.User.registration_date + datetime.timedelta(days=30)) \ .delete() db_calls.commit(session)
def clear_show_list(session): """Delete entries with more than x days old, from the DB.""" today_start = datetime.datetime.utcnow() today_start.replace(hour=0, minute=0, second=0, microsecond=0) session.query(models.ShowSession).filter( models.ShowSession.date_time < today_start - datetime.timedelta(configuration.show_sessions_validity_days)) \ .delete() db_calls.commit(session)
def process_alarms(session: sqlalchemy.orm.Session): """ Process the alarms that exist in the DB. :param session: the db session. """ alarms = db_calls.get_alarms(session) for a in alarms: user = db_calls.get_user_id(session, a.user_id) search_adult = user.show_adult if user is not None else False if a.alarm_type == response_models.AlarmType.LISTINGS.value: titles = [a.show_name] db_shows = [] else: titles = get_show_titles(session, a.trakt_id, a.is_movie) db_shows = search_sessions_db_with_tmdb_id( session, a.trakt_id, a.is_movie, only_new=True, show_season=a.show_season, show_episode=a.show_episode, use_excluded_channels=True, user_id=user.id) db_shows += search_sessions_db(session, titles, a.is_movie, complete_title=True, only_new=True, show_season=a.show_season, show_episode=a.show_episode, search_adult=search_adult, use_excluded_channels=True, user_id=user.id, ignore_with_tmdb_id=True) if len(db_shows) > 0: process_emails.set_language(user.language) process_emails.send_alarms_email(user.email, db_shows) # Update the datetime of the last processing of the alarms last_update = db_calls.get_last_update(session) last_update.alarms_datetime = datetime.datetime.utcnow() db_calls.commit(session)
def delete_old_sessions(db_session: sqlalchemy.orm.Session, start_datetime: datetime.datetime, end_datetime: datetime.datetime, channels: List[str]) -> int: """ Delete sessions that no longer exist. Send emails to the users whose reminders are associated with such sessions. :param db_session: the DB session. :param start_datetime: the start of the interval of interest. :param end_datetime: the end of the interval of interest. :param channels: the set of channels. :return: the number of deleted sessions. """ nb_deleted_sessions = 0 # Get the old show sessions old_sessions = db_calls.search_old_sessions(db_session, start_datetime, end_datetime, channels) for s in old_sessions: nb_deleted_sessions += 1 # Get the reminders associated with this session reminders = db_calls.get_reminders_session(db_session, s.id) if len(reminders) != 0: # Get the session show_session = db_calls.get_show_session_complete(db_session, s.id) show_result = response_models.LocalShowResult.create_from_show_session(show_session[0], show_session[1], show_session[2]) # Warn all users with the reminders for this session for r in reminders: user = db_calls.get_user_id(db_session, r.user_id) process_emails.send_deleted_sessions_email(user.email, [show_result]) # Delete the reminder db_session.delete(r) # Commit to ensure there are no more references to the session db_calls.commit(db_session) # Delete the session db_session.delete(s) db_session.commit() return nb_deleted_sessions
def process_excluded_channel_list(session: sqlalchemy.orm.Session, user_id: int, excluded_channel_list: List[int]) -> None: """ Update the list of excluded channels of a user. :param session: the DB session. :param user_id: the id of the user. :param excluded_channel_list: the list of excluded channels. """ # Get the current list of excluded channels and turn it into a set db_excluded_channel_list = db_calls.get_user_excluded_channels( session, user_id) current_excluded_channel_list = set() for excluded_channel in db_excluded_channel_list: current_excluded_channel_list.add(excluded_channel.channel_id) excluded_channel_list = set(excluded_channel_list) # There's nothing to do if the current set and the new one are the same if current_excluded_channel_list == excluded_channel_list: return # Otherwise, delete all previous entries for old_excluded_channel in db_excluded_channel_list: session.delete(old_excluded_channel) # Otherwise, add all new entries for excluded_channel in excluded_channel_list: db_calls.register_user_excluded_channel(session, user_id, excluded_channel, should_commit=False) db_calls.commit(session)
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel (invalid in this case). :return: the InsertionResult. """ wb = openpyxl.load_workbook(filename) insertion_result = get_file_data.InsertionResult() first_event_datetime = None date_time = None today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # Skip row 1, with the headers for row in wb.active.iter_rows(min_row=2, max_col=15): # Skip rows that contain only the date if row[0].value is None: continue # Skip the rows in which the year is not a number (header rows) if not isinstance(row[4].value, int): continue # Get the data channel_name = row[0].value date = row[1].value time = row[2].value original_title = str(row[3].value) year = int(row[4].value) age_classification = row[5].value genre = row[6].value duration = int(row[7].value) languages = row[8].value countries = row[9].value synopsis = row[10].value directors = row[11].value cast = row[12].value localized_title = str(row[13].value) # episode_title = row[14].value # If the show is not yet defined if 'Programa a Designar' in original_title: continue # Combine the date with the time date_time = date.replace(hour=time.hour, minute=time.minute) # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta(days=configuration.show_sessions_validity_days)): continue # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time # Check if it matches the regex of a series series = re.search('(.+) T([0-9]+),[ ]+([0-9]+)', localized_title.strip()) # If it is a series, extract it's season and episode if series: localized_title = series.group(1) is_movie = False season = int(series.group(2)) episode = int(series.group(3)) # episode_synopsis = synopsis synopsis = None # Also get the original title without the season and episode series = re.search('(.+) T([0-9]+),[ ]+([0-9]+)', original_title.strip()) if series: original_title = series.group(1) else: season = None episode = None is_movie = True # Process the titles localized_title, vp, extended_cut = TVCine.process_title(localized_title) audio_language = 'pt' if vp else None original_title, _, _ = TVCine.process_title(original_title) # Sometimes the cast is switched with the director if cast is not None and directors is not None: cast_commas = auxiliary.search_chars(cast, [','])[0] director_commas = auxiliary.search_chars(directors, [','])[0] # When that happens, switch them if len(cast_commas) < len(director_commas): aux = cast cast = directors directors = aux # Process the directors if directors is not None: directors = directors.split(',') # Genre is movie, series, documentary, news... if genre is None or 'Document' not in genre: subgenre = genre # Subgenre is in portuguese genre = 'Movie' if is_movie else 'Series' else: genre = 'Documentary' subgenre = None channel_name = 'TVCine ' + channel_name.strip().split()[1] channel_id = db_calls.get_channel_name(db_session, channel_name).id # Process file entry insertion_result = get_file_data.process_file_entry(db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, episode, cast=cast, duration=duration, countries=countries, age_classification=age_classification, audio_languages=languages, session_audio_language=audio_language, extended_cut=extended_cut) if insertion_result is None: return None if insertion_result.total_nb_sessions_in_file != 0: db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta(minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions(db_session, file_start_datetime, file_end_datetime, TVCine.channels) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result else: return None
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel. :return: the InsertionResult. """ wb = openpyxl.load_workbook(filename) first_event_datetime = None date_time = None insertion_result = get_file_data.InsertionResult() today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # Skip row 1, with the headers for row in wb.active.iter_rows(min_row=3, max_col=12): # Skip rows that do not contain a date if row[0].value is None: continue # Get the data date = datetime.datetime.strptime(str(row[0].value), '%Y%m%d') time = row[1].value original_title = str(row[2].value) localized_title = str(row[3].value) synopsis = row[4].value year = int(row[5].value) age_classification = row[6].value directors = row[7].value cast = row[8].value subgenre = row[9].value # Obtained in portuguese # Combine the date with the time date_time = date.replace(hour=time.hour, minute=time.minute) # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta( days=configuration.show_sessions_validity_days)): continue # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time # Process the titles localized_title, vp, _ = Cinemundo.process_title(localized_title) audio_language = 'pt' if vp else None original_title, _, season = Cinemundo.process_title(original_title) if season is not None: is_movie = False genre = 'Series' else: is_movie = True genre = 'Movie' # Process the directors if directors is not None: directors = re.split(',| e ', directors) # Get the channel's id channel_id = db_calls.get_channel_name(db_session, 'Cinemundo').id # Process an entry insertion_result = get_file_data.process_file_entry( db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, None, cast=cast, age_classification=age_classification, audio_languages=audio_language) if insertion_result is None: return None if insertion_result.total_nb_sessions_in_file != 0: db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta( minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions( db_session, file_start_datetime, file_end_datetime, Cinemundo.channels) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result else: return None
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel. :return: the InsertionResult. """ dom_tree = xml.dom.minidom.parse(filename) collection = dom_tree.documentElement # Get all events events = collection.getElementsByTagName('Event') # If there are no events if len(events) == 0: return None first_event_datetime = None date_time = None insertion_result = get_file_data.InsertionResult() today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # Process each event for event in events: # --- START DATA GATHERING --- # Get the date and time begin_time = event.getAttribute('beginTime') date_time = datetime.datetime.strptime(begin_time, '%Y%m%d%H%M%S') # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta( days=configuration.show_sessions_validity_days)): continue # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time # Get the event's duration in minutes duration = int(int(event.getAttribute('duration')) / 60) # Inside the Event -> EpgProduction epg_production = event.getElementsByTagName('EpgProduction')[0] # Get the genre genre_list = epg_production.getElementsByTagName('Genere') # Check if it is the genre that we are assuming it always is if len(genre_list) > 0 and 'Document' not in genre_list[ 0].firstChild.nodeValue: get_file_data.print_message( 'not a documentary', True, str(event.getAttribute('beginTime'))) # Subgenre is in portuguese subgenre = epg_production.getElementsByTagName( 'Subgenere')[0].firstChild.nodeValue # Age classification age_classification = epg_production.getElementsByTagName( 'ParentalRating')[0].firstChild.nodeValue # Inside the Event -> EpgProduction -> EpgText epg_text = epg_production.getElementsByTagName('EpgText')[0] # Get the localized title, in this case the portuguese one localized_title = epg_text.getElementsByTagName( 'Name')[0].firstChild.nodeValue # Get the localized synopsis, in this case the portuguese one short_description = epg_text.getElementsByTagName( 'ShortDescription') if short_description is not None and short_description[ 0].firstChild is not None: synopsis = short_description[0].firstChild.nodeValue else: synopsis = None # Iterate over the ExtendedInfo elements extended_info_elements = epg_text.getElementsByTagName( 'ExtendedInfo') original_title = None directors = None season = None episode = None year = None countries = None cast = None for extended_info in extended_info_elements: attribute = extended_info.getAttribute('name') if attribute == 'OriginalEventName' and extended_info.firstChild is not None: original_title = extended_info.firstChild.nodeValue elif attribute == 'Year' and extended_info.firstChild is not None: year = int(extended_info.firstChild.nodeValue) # Sometimes the year is 0 if year == 0: year = None elif attribute == 'Director' and extended_info.firstChild is not None: directors = extended_info.firstChild.nodeValue elif attribute == 'Casting' and extended_info.firstChild is not None: cast = extended_info.firstChild.nodeValue elif attribute == 'Nationality' and extended_info.firstChild is not None: countries = extended_info.firstChild.nodeValue elif attribute == 'Cycle' and extended_info.firstChild is not None: season = int(extended_info.firstChild.nodeValue) elif attribute == 'EpisodeNumber' and extended_info.firstChild is not None: episode = int(extended_info.firstChild.nodeValue) # Get the channel's id channel_id = db_calls.get_channel_name(db_session, 'Odisseia').id # Process titles original_title = Odisseia.process_title(original_title) localized_title = Odisseia.process_title(localized_title) # Process the directors if directors is not None: directors = directors.split(',') is_movie = season is None genre = 'Documentary' # --- END DATA GATHERING --- # Process file entry insertion_result = get_file_data.process_file_entry( db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, episode, cast=cast, duration=duration, countries=countries, age_classification=age_classification) if insertion_result is None: return None # If there only invalid sessions if first_event_datetime is None: return None db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta( minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions( db_session, file_start_datetime, file_end_datetime, Odisseia.channels) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result
def add_file_data(db_session: sqlalchemy.orm.Session, filename: str, channel_name: str) \ -> Optional[get_file_data.InsertionResult]: """ Add the data, in the file, to the DB. :param db_session: the DB session. :param filename: the path to the file. :param channel_name: the name of the channel. :return: the InsertionResult. """ # Get the position and format of the fields for this channel fields = GenericXlsx.process_configuration(channel_name) channel_name = GenericXlsx.channels_file[channel_name][0] # If it is invalid if fields is None: return None if '_file_format' in fields: file_format = fields['_file_format'].field_format else: file_format = '.xlsx' if file_format == '.xls': book = xlrd.open_workbook(filename) sheet = book.sheets()[0] rows = sheet.nrows else: book = openpyxl.load_workbook(filename) sheet = book.active rows = sheet.max_row insertion_result = get_file_data.InsertionResult() first_event_datetime = None date_time = None today_00_00 = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) row_skipped = False for rx in range(rows): if file_format == '.xls': row = sheet.row(rx) else: row = sheet[rx + 1] # Skip row 1, with the headers if not row_skipped: row_skipped = True continue # Skip the rows in which the year is not a number (header rows) if row[fields['year'].position].value is None: continue try: year = int(row[fields['year'].position].value) except ValueError: continue # Get the date_time if 'date_time' in fields: date_time = datetime.datetime.strptime( row[fields['date_time'].position].value, fields['date_time'].field_format) else: if file_format == '.xls': date = xlrd.xldate_as_datetime( row[fields['date'].position].value, book.datemode) try: # If the time comes in the time format time = xlrd.xldate_as_datetime( row[fields['time'].position].value, book.datemode) except TypeError: # If the time comes as text time = datetime.datetime.strptime( row[fields['time'].position].value, fields['time'].field_format) else: try: date = datetime.datetime.strptime( row[fields['date'].position].value, fields['date'].field_format) time = datetime.datetime.strptime( row[fields['time'].position].value, fields['time'].field_format) except TypeError: continue # Combine the date with the time date_time = date.replace(hour=time.hour, minute=time.minute) # Add the Lisbon timezone info, then convert it to UTC # and then remove the timezone info date_time = auxiliary.convert_datetime_to_utc(auxiliary.get_datetime_with_tz_offset(date_time)) \ .replace(tzinfo=None) # Ignore old sessions if date_time < (today_00_00 - datetime.timedelta( days=configuration.show_sessions_validity_days)): continue original_title = str(row[fields['original_title'].position].value) localized_title = str( row[fields['localized_title'].position].value) # If it is a placeholder show or temporary program if '_temporary_program' in fields: if str(fields['_temporary_program'].field_format ) in original_title: continue if 'localized_synopsis' in fields: synopsis = str( row[fields['localized_synopsis'].position].value).strip() else: synopsis = None if 'cast' in fields: cast = row[fields['cast'].position].value.strip() if len(cast) == 0: cast = None else: cast = None if 'directors' in fields: directors = row[fields['directors'].position].value # Process the directors if directors is not None: if re.match('^ *$', directors): directors = None else: directors = directors.split(',') # If the name of the directors is actually a placeholder if '_ignore_directors' in fields and directors: if directors[0].strip( ) == fields['_ignore_directors'].field_format: directors = None else: directors = None if 'creators' in fields: creators = row[fields['creators'].position].value # Process the creators if creators is not None: if re.match('^ *$', creators): creators = None else: creators = creators.split(',') else: creators = None if 'countries' in fields: countries = row[fields['countries'].position].value.strip() else: countries = None # Duration if 'duration' in fields: if fields['duration'].field_format == 'seconds': duration = int( int(row[fields['duration'].position].value) / 60) else: if file_format == '.xls': duration = xlrd.xldate_as_datetime( row[fields['duration'].position].value, book.datemode) else: duration = datetime.datetime.strptime( row[fields['duration'].position].value, fields['duration'].field_format) duration = duration.hour * 60 + duration.minute else: duration = None if 'age_classification' in fields: age_classification = str( row[fields['age_classification'].position].value).strip() else: age_classification = None if 'subgenre' in fields: subgenre = row[fields['subgenre'].position].value.strip() else: subgenre = None # Get the first event's datetime if first_event_datetime is None: first_event_datetime = date_time if 'season' not in fields or 'episode' not in fields: season = None episode = None else: try: season = int(row[fields['season'].position].value) except ValueError: try: # There are entries with a season 2.5, which will be converted to 2 season = int( float(row[fields['season'].position].value)) except ValueError: season = None # Some files use 0 as a placeholder if season == 0: season = None episode = None if fields['episode'].field_format == 'int': episode = int(row[fields['episode'].position].value) elif 'title_with_Ep.' in fields['episode'].field_format: series = re.search( r'Ep\. [0-9]+', row[fields['episode'].position].value.strip()) if series is not None: episode = int(series.group(0)[4:]) if season == 0: season = None # Determine whether or not it is a movie is_movie = season is None or episode is None # Make sure the season and episode are None for movies if is_movie: season = None episode = None # Genre is movie, series, documentary, news... genre = 'Movie' if is_movie else 'Series' # Process the title original_title = GenericXlsx.process_title( original_title, fields['original_title'].field_format, is_movie) localized_title = GenericXlsx.process_title( localized_title, fields['localized_title'].field_format, is_movie) channel_id = db_calls.get_channel_name(db_session, channel_name).id # Process file entry insertion_result = get_file_data.process_file_entry( db_session, insertion_result, original_title, localized_title, is_movie, genre, date_time, channel_id, year, directors, subgenre, synopsis, season, episode, cast=cast, duration=duration, countries=countries, age_classification=age_classification, creators=creators) if insertion_result is None: return None if insertion_result.total_nb_sessions_in_file != 0: db_calls.commit(db_session) # Delete old sessions for the same time period file_start_datetime = first_event_datetime - datetime.timedelta( minutes=5) file_end_datetime = date_time + datetime.timedelta(minutes=5) nb_deleted_sessions = get_file_data.delete_old_sessions( db_session, file_start_datetime, file_end_datetime, [channel_name]) # Set the remaining information insertion_result.nb_deleted_sessions = nb_deleted_sessions insertion_result.start_datetime = file_start_datetime insertion_result.end_datetime = file_end_datetime return insertion_result else: return None