class TimerManager: def __init__(self):##牌类型,牌字 self.m_scheduler = TornadoScheduler() self.start() def start(self):## return self.m_scheduler.start() def stop(self):## return self.m_scheduler.shutdown() def getTimer(self,id):## return self.m_scheduler.get_job(str(id)) def addLoopTimer(self, tid,tick,time,arg): ## try: job = self.getTimer(tid) if job == None: self.m_scheduler.add_job(tick, 'interval', seconds=int(time), args=arg,id=str(tid),replace_existing = True) # 间隔3秒钟执行一次 except Exception, e: print str(Exception)
class Core(object): def __init__(self): self.started = False # process id self.PID = os.getpid() # generate notifiers dict self.notifiersDict = AttrDict( libnotify=LibnotifyNotifier(), kodi_notifier=KODINotifier(), plex_notifier=PLEXNotifier(), emby_notifier=EMBYNotifier(), nmj_notifier=NMJNotifier(), nmjv2_notifier=NMJv2Notifier(), synoindex_notifier=synoIndexNotifier(), synology_notifier=synologyNotifier(), pytivo_notifier=pyTivoNotifier(), growl_notifier=GrowlNotifier(), prowl_notifier=ProwlNotifier(), libnotify_notifier=LibnotifyNotifier(), pushover_notifier=PushoverNotifier(), boxcar_notifier=BoxcarNotifier(), boxcar2_notifier=Boxcar2Notifier(), nma_notifier=NMA_Notifier(), pushalot_notifier=PushalotNotifier(), pushbullet_notifier=PushbulletNotifier(), freemobile_notifier=FreeMobileNotifier(), twitter_notifier=TwitterNotifier(), trakt_notifier=TraktNotifier(), email_notifier=EmailNotifier() ) # generate metadata providers dict self.metadataProviderDict = get_metadata_generator_dict() # generate providers dict self.providersDict = providersDict() # init notification queue self.srNotifications = Notifications() # init logger self.srLogger = srLogger() # init config self.srConfig = srConfig() # init scheduler service self.srScheduler = TornadoScheduler() # init web server self.srWebServer = srWebServer() # init web client session self.srWebSession = srSession() # google api self.googleAuth = googleAuth() # name cache self.NAMECACHE = srNameCache() # queues self.SHOWQUEUE = srShowQueue() self.SEARCHQUEUE = srSearchQueue() # updaters self.VERSIONUPDATER = srVersionUpdater() self.SHOWUPDATER = srShowUpdater() # searchers self.DAILYSEARCHER = srDailySearcher() self.BACKLOGSEARCHER = srBacklogSearcher() self.PROPERSEARCHER = srProperSearcher() self.TRAKTSEARCHER = srTraktSearcher() self.SUBTITLESEARCHER = srSubtitleSearcher() # auto postprocessor self.AUTOPOSTPROCESSOR = srPostProcessor() # sickrage version self.NEWEST_VERSION = None self.NEWEST_VERSION_STRING = None # anidb connection self.ADBA_CONNECTION = None # show list self.SHOWLIST = [] def start(self): self.started = True # thread name threading.currentThread().setName('CORE') # Check if we need to perform a restore first if os.path.exists(os.path.abspath(os.path.join(sickrage.DATA_DIR, 'restore'))): success = restoreSR(os.path.abspath(os.path.join(sickrage.DATA_DIR, 'restore')), sickrage.DATA_DIR) print("Restoring SiCKRAGE backup: %s!\n" % ("FAILED", "SUCCESSFUL")[success]) if success: shutil.rmtree(os.path.abspath(os.path.join(sickrage.DATA_DIR, 'restore')), ignore_errors=True) # migrate old database file names to new ones if os.path.isfile(os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickbeard.db'))): if os.path.isfile(os.path.join(sickrage.DATA_DIR, 'sickrage.db')): helpers.moveFile(os.path.join(sickrage.DATA_DIR, 'sickrage.db'), os.path.join(sickrage.DATA_DIR, '{}.bak-{}' .format('sickrage.db', datetime.datetime.now().strftime( '%Y%m%d_%H%M%S')))) helpers.moveFile(os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickbeard.db')), os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickrage.db'))) # load config self.srConfig.load() # set socket timeout socket.setdefaulttimeout(self.srConfig.SOCKET_TIMEOUT) # setup logger settings self.srLogger.logSize = self.srConfig.LOG_SIZE self.srLogger.logNr = self.srConfig.LOG_NR self.srLogger.debugLogging = sickrage.DEBUG self.srLogger.consoleLogging = not sickrage.QUITE self.srLogger.logFile = self.srConfig.LOG_FILE # start logger self.srLogger.start() # initialize the main SB database main_db.MainDB().InitialSchema().upgrade() # initialize the cache database cache_db.CacheDB().InitialSchema().upgrade() # initialize the failed downloads database failed_db.FailedDB().InitialSchema().upgrade() # fix up any db problems main_db.MainDB().SanityCheck() # load data for shows from database self.load_shows() if self.srConfig.DEFAULT_PAGE not in ('home', 'schedule', 'history', 'news', 'IRC'): self.srConfig.DEFAULT_PAGE = 'home' # cleanup cache folder for dir in ['mako', 'sessions', 'indexers']: try: shutil.rmtree(os.path.join(self.srConfig.CACHE_DIR, dir), ignore_errors=True) except Exception: continue # init anidb connection if not self.srConfig.USE_ANIDB: try: self.ADBA_CONNECTION = adba.Connection(keepAlive=True, log=lambda msg: self.srLogger.debug( "AniDB: %s " % msg)).auth(self.srConfig.ANIDB_USERNAME, self.srConfig.ANIDB_PASSWORD) except Exception as e: self.srLogger.warning("AniDB exception msg: %r " % repr(e)) if self.srConfig.WEB_PORT < 21 or self.srConfig.WEB_PORT > 65535: self.srConfig.WEB_PORT = 8081 if not self.srConfig.WEB_COOKIE_SECRET: self.srConfig.WEB_COOKIE_SECRET = generateCookieSecret() # attempt to help prevent users from breaking links by using a bad url if not self.srConfig.ANON_REDIRECT.endswith('?'): self.srConfig.ANON_REDIRECT = '' if not re.match(r'\d+\|[^|]+(?:\|[^|]+)*', self.srConfig.ROOT_DIRS): self.srConfig.ROOT_DIRS = '' self.srConfig.NAMING_FORCE_FOLDERS = check_force_season_folders() if self.srConfig.NZB_METHOD not in ('blackhole', 'sabnzbd', 'nzbget'): self.srConfig.NZB_METHOD = 'blackhole' if self.srConfig.TORRENT_METHOD not in ('blackhole', 'utorrent', 'transmission', 'deluge', 'deluged', 'download_station', 'rtorrent', 'qbittorrent', 'mlnet', 'putio'): self.srConfig.TORRENT_METHOD = 'blackhole' if self.srConfig.PROPER_SEARCHER_INTERVAL not in ('15m', '45m', '90m', '4h', 'daily'): self.srConfig.PROPER_SEARCHER_INTERVAL = 'daily' if self.srConfig.AUTOPOSTPROCESSOR_FREQ < self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ: self.srConfig.AUTOPOSTPROCESSOR_FREQ = self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ if self.srConfig.NAMECACHE_FREQ < self.srConfig.MIN_NAMECACHE_FREQ: self.srConfig.NAMECACHE_FREQ = self.srConfig.MIN_NAMECACHE_FREQ if self.srConfig.DAILY_SEARCHER_FREQ < self.srConfig.MIN_DAILY_SEARCHER_FREQ: self.srConfig.DAILY_SEARCHER_FREQ = self.srConfig.MIN_DAILY_SEARCHER_FREQ self.srConfig.MIN_BACKLOG_SEARCHER_FREQ = get_backlog_cycle_time() if self.srConfig.BACKLOG_SEARCHER_FREQ < self.srConfig.MIN_BACKLOG_SEARCHER_FREQ: self.srConfig.BACKLOG_SEARCHER_FREQ = self.srConfig.MIN_BACKLOG_SEARCHER_FREQ if self.srConfig.VERSION_UPDATER_FREQ < self.srConfig.MIN_VERSION_UPDATER_FREQ: self.srConfig.VERSION_UPDATER_FREQ = self.srConfig.MIN_VERSION_UPDATER_FREQ if self.srConfig.SHOWUPDATE_HOUR > 23: self.srConfig.SHOWUPDATE_HOUR = 0 elif self.srConfig.SHOWUPDATE_HOUR < 0: self.srConfig.SHOWUPDATE_HOUR = 0 if self.srConfig.SUBTITLE_SEARCHER_FREQ < self.srConfig.MIN_SUBTITLE_SEARCHER_FREQ: self.srConfig.SUBTITLE_SEARCHER_FREQ = self.srConfig.MIN_SUBTITLE_SEARCHER_FREQ self.srConfig.NEWS_LATEST = self.srConfig.NEWS_LAST_READ if self.srConfig.SUBTITLES_LANGUAGES[0] == '': self.srConfig.SUBTITLES_LANGUAGES = [] # initialize metadata_providers for cur_metadata_tuple in [(self.srConfig.METADATA_KODI, kodi), (self.srConfig.METADATA_KODI_12PLUS, kodi_12plus), (self.srConfig.METADATA_MEDIABROWSER, mediabrowser), (self.srConfig.METADATA_PS3, ps3), (self.srConfig.METADATA_WDTV, wdtv), (self.srConfig.METADATA_TIVO, tivo), (self.srConfig.METADATA_MEDE8ER, mede8er)]: (cur_metadata_config, cur_metadata_class) = cur_metadata_tuple tmp_provider = cur_metadata_class.metadata_class() tmp_provider.set_config(cur_metadata_config) self.metadataProviderDict[tmp_provider.name] = tmp_provider # add show queue job self.srScheduler.add_job( self.SHOWQUEUE.run, srIntervalTrigger(**{'seconds': 5}), name="SHOWQUEUE", id="SHOWQUEUE" ) # add search queue job self.srScheduler.add_job( self.SEARCHQUEUE.run, srIntervalTrigger(**{'seconds': 5}), name="SEARCHQUEUE", id="SEARCHQUEUE" ) # add version checker job self.srScheduler.add_job( self.VERSIONUPDATER.run, srIntervalTrigger( **{'hours': self.srConfig.VERSION_UPDATER_FREQ, 'min': self.srConfig.MIN_VERSION_UPDATER_FREQ}), name="VERSIONUPDATER", id="VERSIONUPDATER" ) # add network timezones updater job self.srScheduler.add_job( update_network_dict, srIntervalTrigger(**{'days': 1}), name="TZUPDATER", id="TZUPDATER" ) # add namecache updater job self.srScheduler.add_job( self.NAMECACHE.run, srIntervalTrigger( **{'minutes': self.srConfig.NAMECACHE_FREQ, 'min': self.srConfig.MIN_NAMECACHE_FREQ}), name="NAMECACHE", id="NAMECACHE" ) # add show updater job self.srScheduler.add_job( self.SHOWUPDATER.run, srIntervalTrigger( **{'hours': 1, 'start_date': datetime.datetime.now().replace(hour=self.srConfig.SHOWUPDATE_HOUR)}), name="SHOWUPDATER", id="SHOWUPDATER" ) # add daily search job self.srScheduler.add_job( self.DAILYSEARCHER.run, srIntervalTrigger( **{'minutes': self.srConfig.DAILY_SEARCHER_FREQ, 'min': self.srConfig.MIN_DAILY_SEARCHER_FREQ}), name="DAILYSEARCHER", id="DAILYSEARCHER" ) # add backlog search job self.srScheduler.add_job( self.BACKLOGSEARCHER.run, srIntervalTrigger( **{'minutes': self.srConfig.BACKLOG_SEARCHER_FREQ, 'min': self.srConfig.MIN_BACKLOG_SEARCHER_FREQ}), name="BACKLOG", id="BACKLOG" ) # add auto-postprocessing job self.srScheduler.add_job( self.AUTOPOSTPROCESSOR.run, srIntervalTrigger(**{'minutes': self.srConfig.AUTOPOSTPROCESSOR_FREQ, 'min': self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ}), name="POSTPROCESSOR", id="POSTPROCESSOR" ) # add find proper job self.srScheduler.add_job( self.PROPERSEARCHER.run, srIntervalTrigger(**{ 'minutes': {'15m': 15, '45m': 45, '90m': 90, '4h': 4 * 60, 'daily': 24 * 60}[ self.srConfig.PROPER_SEARCHER_INTERVAL]}), name="PROPERSEARCHER", id="PROPERSEARCHER" ) # add trakt.tv checker job self.srScheduler.add_job( self.TRAKTSEARCHER.run, srIntervalTrigger(**{'hours': 1}), name="TRAKTSEARCHER", id="TRAKTSEARCHER" ) # add subtitles finder job self.srScheduler.add_job( self.SUBTITLESEARCHER.run, srIntervalTrigger(**{'hours': self.srConfig.SUBTITLE_SEARCHER_FREQ}), name="SUBTITLESEARCHER", id="SUBTITLESEARCHER" ) # start scheduler service self.srScheduler.start() # Pause/Resume PROPERSEARCHER job (self.srScheduler.get_job('PROPERSEARCHER').pause, self.srScheduler.get_job('PROPERSEARCHER').resume )[self.srConfig.DOWNLOAD_PROPERS]() # Pause/Resume TRAKTSEARCHER job (self.srScheduler.get_job('TRAKTSEARCHER').pause, self.srScheduler.get_job('TRAKTSEARCHER').resume )[self.srConfig.USE_TRAKT]() # Pause/Resume SUBTITLESEARCHER job (self.srScheduler.get_job('SUBTITLESEARCHER').pause, self.srScheduler.get_job('SUBTITLESEARCHER').resume )[self.srConfig.USE_SUBTITLES]() # Pause/Resume POSTPROCESS job (self.srScheduler.get_job('POSTPROCESSOR').pause, self.srScheduler.get_job('POSTPROCESSOR').resume )[self.srConfig.PROCESS_AUTOMATICALLY]() # start webserver self.srWebServer.start() # start ioloop event handler IOLoop.instance().start() def shutdown(self, status=None, restart=False): if self.started: self.started = False if restart: self.srLogger.info('SiCKRAGE IS PERFORMING A RESTART!') else: self.srLogger.info('SiCKRAGE IS PERFORMING A SHUTDOWN!') # shutdown/restart webserver self.srWebServer.shutdown() # shutdown scheduler self.srLogger.info("Shutting down scheduler") self.srScheduler.shutdown() # shutdown queues self.srLogger.info("Shutting down queues") if self.SHOWQUEUE: self.SHOWQUEUE.shutdown() if self.SEARCHQUEUE: self.SEARCHQUEUE.shutdown() if sickrage.srCore.ADBA_CONNECTION: self.srLogger.info("Logging out ANIDB connection") sickrage.srCore.ADBA_CONNECTION.logout() # save all settings self.save_all() if restart: self.srLogger.info('SiCKRAGE IS RESTARTING!') else: self.srLogger.info('SiCKRAGE IS SHUTDOWN!') # shutdown logging self.srLogger.shutdown() # delete pid file if sickrage.DAEMONIZE: sickrage.delpid(sickrage.PID_FILE) # system exit with status if not restart: sys.exit(status) # stop ioloop event handler IOLoop.current().stop() def save_all(self): # write all shows self.srLogger.info("Saving all shows to the database") for SHOW in self.SHOWLIST: try: SHOW.saveToDB() except: continue # save config self.srConfig.save() def load_shows(self): """ Populates the showlist with shows from the database """ for sqlShow in main_db.MainDB().select("SELECT * FROM tv_shows"): try: curshow = TVShow(int(sqlShow["indexer"]), int(sqlShow["indexer_id"])) self.srLogger.debug("Loading data for show: [{}]".format(curshow.name)) #self.NAMECACHE.buildNameCache(curshow) curshow.nextEpisode() self.SHOWLIST += [curshow] except Exception as e: self.srLogger.error( "There was an error creating the show in {}: {}".format(sqlShow["location"], e.message)) self.srLogger.debug(traceback.format_exc())
class Scheduler: """Class encapsulating scheduling logic. """ def __init__(self, redis_args, channel_name, postgres_url): self.redis_args = redis_args self.channel_name = channel_name self.scheduler = TornadoScheduler({ 'apscheduler.jobstores.default': { 'type': 'sqlalchemy', 'url': postgres_url } }) # # The main entry-point # async def create_event(self, event_type, context): """Runs the schedule method by name. """ func = getattr(self, event_type) await func(context) # # Shortcuts for scheduler methods # def add(self, date, func, args): """Shortcut for adding new events. """ job = self.scheduler.add_job(func=func, trigger='date', next_run_time=date, args=args) return job def make(self): """Shortcut for running the scheduler workers. """ self.scheduler.start() def remove_job(self, job_id): """Shortcut for removing scheduler job by id. """ job = self.scheduler.get_job(job_id) if job: job.remove() def publish_now(self, message): """Shortcut for publishing message in Redis channel immediately. """ self._notify_interview(message, self.redis_args, self.channel_name) # # Setting scheduler tasks # async def schedule_interview(self, context): """Schedules the events: * remind an hour in advance; * remind in the morning of the event day; * remind in the evening before the event day. """ message = context['message'] interview = context['interview'] interview_date = handler.get_date_from_string(message['start']) args = (message, self.redis_args, self.channel_name) scheduled_dates = self.get_scheduled_dates(interview_date) jobs = [] for scheduled_date in scheduled_dates: job = self.add(date=scheduled_date, func=self._notify_interview, args=args) jobs.append(job.id) await interview.update(jobs=json.dumps(jobs)).apply() async def remove_candidate(self, context): """Removes the candidate in a day after first working day at midnight. """ day_after_fwd = self.get_day_after_fwd(context['employment_date']) self.add(date=day_after_fwd, func=self._remove_candidate, args=(context['candidate_id'], )) # # Functions to be invoked when the date comes # Note that the method should be static since pickle can't serialize self param. # @staticmethod def _notify_interview(message, redis_conn_args, channel_name): conn = FakeStrictRedis() if not redis_conn_args else StrictRedis( **redis_conn_args) conn.publish(channel_name, json.dumps(message)) @staticmethod async def _remove_candidate(candidate_id): await Interview.delete.where(Interview.candidate == candidate_id ).gino.status() candidate = await Candidate.get(candidate_id) await candidate.delete() # # Calculates dates to be used as triggers for scheduler jobs # @staticmethod def get_scheduled_dates(interview_date): """Calculates the dates for notification about incoming interview. """ an_hour_in_advance = interview_date - timedelta(hours=1) morning_of_event_day = interview_date.replace(hour=7, minute=0, second=0) evening_before_event_day = interview_date.replace( hour=18, minute=0, second=0) - timedelta(days=1) return an_hour_in_advance, morning_of_event_day, evening_before_event_day @staticmethod def get_day_after_fwd(fwd_date_string): """Calculates the day after first working day from string. """ fwd_date = list(map(int, fwd_date_string.split('-'))) day_after_fwd = datetime(year=fwd_date[0], month=fwd_date[1], day=fwd_date[2]) + timedelta(days=1) return day_after_fwd
class Core(object): def __init__(self): self.started = False # process id self.PID = os.getpid() # cpu count self.CPU_COUNT = cpu_count() # generate notifiers dict self.notifiersDict = AttrDict(libnotify=LibnotifyNotifier(), kodi_notifier=KODINotifier(), plex_notifier=PLEXNotifier(), emby_notifier=EMBYNotifier(), nmj_notifier=NMJNotifier(), nmjv2_notifier=NMJv2Notifier(), synoindex_notifier=synoIndexNotifier(), synology_notifier=synologyNotifier(), pytivo_notifier=pyTivoNotifier(), growl_notifier=GrowlNotifier(), prowl_notifier=ProwlNotifier(), libnotify_notifier=LibnotifyNotifier(), pushover_notifier=PushoverNotifier(), boxcar_notifier=BoxcarNotifier(), boxcar2_notifier=Boxcar2Notifier(), nma_notifier=NMA_Notifier(), pushalot_notifier=PushalotNotifier(), pushbullet_notifier=PushbulletNotifier(), freemobile_notifier=FreeMobileNotifier(), twitter_notifier=TwitterNotifier(), trakt_notifier=TraktNotifier(), email_notifier=EmailNotifier()) # generate metadata providers dict self.metadataProviderDict = get_metadata_generator_dict() # generate providers dict self.providersDict = providersDict() # init notification queue self.srNotifications = Notifications() # init logger self.srLogger = srLogger() # init config self.srConfig = srConfig() # init scheduler service self.srScheduler = TornadoScheduler() # init web server self.srWebServer = srWebServer() # init web client session self.srWebSession = srSession() # google api self.googleAuth = googleAuth() # name cache self.NAMECACHE = srNameCache() # queues self.SHOWQUEUE = srShowQueue() self.SEARCHQUEUE = srSearchQueue() # updaters self.VERSIONUPDATER = srVersionUpdater() self.SHOWUPDATER = srShowUpdater() # searchers self.DAILYSEARCHER = srDailySearcher() self.BACKLOGSEARCHER = srBacklogSearcher() self.PROPERSEARCHER = srProperSearcher() self.TRAKTSEARCHER = srTraktSearcher() self.SUBTITLESEARCHER = srSubtitleSearcher() # auto postprocessor self.AUTOPOSTPROCESSOR = srPostProcessor() # sickrage version self.NEWEST_VERSION = None self.NEWEST_VERSION_STRING = None # anidb connection self.ADBA_CONNECTION = None # show list self.SHOWLIST = [] def start(self): self.started = True # thread name threading.currentThread().setName('CORE') # Check if we need to perform a restore first if os.path.exists( os.path.abspath(os.path.join(sickrage.DATA_DIR, 'restore'))): success = restoreSR( os.path.abspath(os.path.join(sickrage.DATA_DIR, 'restore')), sickrage.DATA_DIR) print("Restoring SiCKRAGE backup: %s!\n" % ("FAILED", "SUCCESSFUL")[success]) if success: shutil.rmtree(os.path.abspath( os.path.join(sickrage.DATA_DIR, 'restore')), ignore_errors=True) # migrate old database file names to new ones if os.path.isfile( os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickbeard.db'))): if os.path.isfile(os.path.join(sickrage.DATA_DIR, 'sickrage.db')): helpers.moveFile( os.path.join(sickrage.DATA_DIR, 'sickrage.db'), os.path.join( sickrage.DATA_DIR, '{}.bak-{}'.format( 'sickrage.db', datetime.datetime.now().strftime( '%Y%m%d_%H%M%S')))) helpers.moveFile( os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickbeard.db')), os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickrage.db'))) # load config self.srConfig.load() # set socket timeout socket.setdefaulttimeout(self.srConfig.SOCKET_TIMEOUT) # setup logger settings self.srLogger.logSize = self.srConfig.LOG_SIZE self.srLogger.logNr = self.srConfig.LOG_NR self.srLogger.debugLogging = sickrage.DEBUG self.srLogger.consoleLogging = not sickrage.QUITE self.srLogger.logFile = self.srConfig.LOG_FILE # start logger self.srLogger.start() # Check available space try: total_space, available_space = getFreeSpace(sickrage.DATA_DIR) if available_space < 100: self.srLogger.error( 'Shutting down as SiCKRAGE needs some space to work. You\'ll get corrupted data otherwise. Only %sMB left', available_space) sickrage.restart = False return except: self.srLogger.error('Failed getting diskspace: %s', traceback.format_exc()) # perform database startup actions for db in [MainDB, CacheDB, FailedDB]: # initialize the database db().initialize() # migrate the database db().migrate() # compact the main database db().compact() # load data for shows from database self.load_shows() # build name cache self.NAMECACHE.build() if self.srConfig.DEFAULT_PAGE not in ('home', 'schedule', 'history', 'news', 'IRC'): self.srConfig.DEFAULT_PAGE = 'home' # cleanup cache folder for folder in ['mako', 'sessions', 'indexers']: try: shutil.rmtree(os.path.join(self.srConfig.CACHE_DIR, folder), ignore_errors=True) except Exception: continue # init anidb connection if not self.srConfig.USE_ANIDB: try: self.ADBA_CONNECTION = adba.Connection( keepAlive=True, log=lambda msg: self.srLogger.debug( "AniDB: %s " % msg)).auth(self.srConfig.ANIDB_USERNAME, self.srConfig.ANIDB_PASSWORD) except Exception as e: self.srLogger.warning("AniDB exception msg: %r " % repr(e)) if self.srConfig.WEB_PORT < 21 or self.srConfig.WEB_PORT > 65535: self.srConfig.WEB_PORT = 8081 if not self.srConfig.WEB_COOKIE_SECRET: self.srConfig.WEB_COOKIE_SECRET = generateCookieSecret() # attempt to help prevent users from breaking links by using a bad url if not self.srConfig.ANON_REDIRECT.endswith('?'): self.srConfig.ANON_REDIRECT = '' if not re.match(r'\d+\|[^|]+(?:\|[^|]+)*', self.srConfig.ROOT_DIRS): self.srConfig.ROOT_DIRS = '' self.srConfig.NAMING_FORCE_FOLDERS = check_force_season_folders() if self.srConfig.NZB_METHOD not in ('blackhole', 'sabnzbd', 'nzbget'): self.srConfig.NZB_METHOD = 'blackhole' if self.srConfig.TORRENT_METHOD not in ('blackhole', 'utorrent', 'transmission', 'deluge', 'deluged', 'download_station', 'rtorrent', 'qbittorrent', 'mlnet', 'putio'): self.srConfig.TORRENT_METHOD = 'blackhole' if self.srConfig.PROPER_SEARCHER_INTERVAL not in ('15m', '45m', '90m', '4h', 'daily'): self.srConfig.PROPER_SEARCHER_INTERVAL = 'daily' if self.srConfig.AUTOPOSTPROCESSOR_FREQ < self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ: self.srConfig.AUTOPOSTPROCESSOR_FREQ = self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ if self.srConfig.NAMECACHE_FREQ < self.srConfig.MIN_NAMECACHE_FREQ: self.srConfig.NAMECACHE_FREQ = self.srConfig.MIN_NAMECACHE_FREQ if self.srConfig.DAILY_SEARCHER_FREQ < self.srConfig.MIN_DAILY_SEARCHER_FREQ: self.srConfig.DAILY_SEARCHER_FREQ = self.srConfig.MIN_DAILY_SEARCHER_FREQ self.srConfig.MIN_BACKLOG_SEARCHER_FREQ = get_backlog_cycle_time() if self.srConfig.BACKLOG_SEARCHER_FREQ < self.srConfig.MIN_BACKLOG_SEARCHER_FREQ: self.srConfig.BACKLOG_SEARCHER_FREQ = self.srConfig.MIN_BACKLOG_SEARCHER_FREQ if self.srConfig.VERSION_UPDATER_FREQ < self.srConfig.MIN_VERSION_UPDATER_FREQ: self.srConfig.VERSION_UPDATER_FREQ = self.srConfig.MIN_VERSION_UPDATER_FREQ if self.srConfig.SHOWUPDATE_HOUR > 23: self.srConfig.SHOWUPDATE_HOUR = 0 elif self.srConfig.SHOWUPDATE_HOUR < 0: self.srConfig.SHOWUPDATE_HOUR = 0 if self.srConfig.SUBTITLE_SEARCHER_FREQ < self.srConfig.MIN_SUBTITLE_SEARCHER_FREQ: self.srConfig.SUBTITLE_SEARCHER_FREQ = self.srConfig.MIN_SUBTITLE_SEARCHER_FREQ self.srConfig.NEWS_LATEST = self.srConfig.NEWS_LAST_READ if self.srConfig.SUBTITLES_LANGUAGES[0] == '': self.srConfig.SUBTITLES_LANGUAGES = [] # initialize metadata_providers for cur_metadata_tuple in [ (self.srConfig.METADATA_KODI, kodi), (self.srConfig.METADATA_KODI_12PLUS, kodi_12plus), (self.srConfig.METADATA_MEDIABROWSER, mediabrowser), (self.srConfig.METADATA_PS3, ps3), (self.srConfig.METADATA_WDTV, wdtv), (self.srConfig.METADATA_TIVO, tivo), (self.srConfig.METADATA_MEDE8ER, mede8er) ]: (cur_metadata_config, cur_metadata_class) = cur_metadata_tuple tmp_provider = cur_metadata_class.metadata_class() tmp_provider.set_config(cur_metadata_config) self.metadataProviderDict[tmp_provider.name] = tmp_provider # add version checker job self.srScheduler.add_job( self.VERSIONUPDATER.run, srIntervalTrigger( **{ 'hours': self.srConfig.VERSION_UPDATER_FREQ, 'min': self.srConfig.MIN_VERSION_UPDATER_FREQ }), name="VERSIONUPDATER", id="VERSIONUPDATER") # add network timezones updater job self.srScheduler.add_job(update_network_dict, srIntervalTrigger(**{'days': 1}), name="TZUPDATER", id="TZUPDATER") # add namecache updater job self.srScheduler.add_job( self.NAMECACHE.run, srIntervalTrigger( **{ 'minutes': self.srConfig.NAMECACHE_FREQ, 'min': self.srConfig.MIN_NAMECACHE_FREQ }), name="NAMECACHE", id="NAMECACHE") # add show updater job self.srScheduler.add_job( self.SHOWUPDATER.run, srIntervalTrigger( **{ 'hours': 1, 'start_date': datetime.datetime.now().replace( hour=self.srConfig.SHOWUPDATE_HOUR) }), name="SHOWUPDATER", id="SHOWUPDATER") # add daily search job self.srScheduler.add_job( self.DAILYSEARCHER.run, srIntervalTrigger( **{ 'minutes': self.srConfig.DAILY_SEARCHER_FREQ, 'min': self.srConfig.MIN_DAILY_SEARCHER_FREQ }), name="DAILYSEARCHER", id="DAILYSEARCHER") # add backlog search job self.srScheduler.add_job( self.BACKLOGSEARCHER.run, srIntervalTrigger( **{ 'minutes': self.srConfig.BACKLOG_SEARCHER_FREQ, 'min': self.srConfig.MIN_BACKLOG_SEARCHER_FREQ }), name="BACKLOG", id="BACKLOG") # add auto-postprocessing job self.srScheduler.add_job( self.AUTOPOSTPROCESSOR.run, srIntervalTrigger( **{ 'minutes': self.srConfig.AUTOPOSTPROCESSOR_FREQ, 'min': self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ }), name="POSTPROCESSOR", id="POSTPROCESSOR") # add find proper job self.srScheduler.add_job( self.PROPERSEARCHER.run, srIntervalTrigger( **{ 'minutes': { '15m': 15, '45m': 45, '90m': 90, '4h': 4 * 60, 'daily': 24 * 60 }[self.srConfig.PROPER_SEARCHER_INTERVAL] }), name="PROPERSEARCHER", id="PROPERSEARCHER") # add trakt.tv checker job self.srScheduler.add_job(self.TRAKTSEARCHER.run, srIntervalTrigger(**{'hours': 1}), name="TRAKTSEARCHER", id="TRAKTSEARCHER") # add subtitles finder job self.srScheduler.add_job( self.SUBTITLESEARCHER.run, srIntervalTrigger( **{'hours': self.srConfig.SUBTITLE_SEARCHER_FREQ}), name="SUBTITLESEARCHER", id="SUBTITLESEARCHER") # start scheduler service self.srScheduler.start() # Pause/Resume PROPERSEARCHER job (self.srScheduler.get_job('PROPERSEARCHER').pause, self.srScheduler.get_job('PROPERSEARCHER').resume )[self.srConfig.DOWNLOAD_PROPERS]() # Pause/Resume TRAKTSEARCHER job (self.srScheduler.get_job('TRAKTSEARCHER').pause, self.srScheduler.get_job('TRAKTSEARCHER').resume )[self.srConfig.USE_TRAKT]() # Pause/Resume SUBTITLESEARCHER job (self.srScheduler.get_job('SUBTITLESEARCHER').pause, self.srScheduler.get_job('SUBTITLESEARCHER').resume )[self.srConfig.USE_SUBTITLES]() # Pause/Resume POSTPROCESS job (self.srScheduler.get_job('POSTPROCESSOR').pause, self.srScheduler.get_job('POSTPROCESSOR').resume )[self.srConfig.PROCESS_AUTOMATICALLY]() # start queue's self.SEARCHQUEUE.start() self.SHOWQUEUE.start() # start webserver self.srWebServer.start() # start ioloop event handler IOLoop.current().start() def shutdown(self): if self.started: self.started = False self.srLogger.info('SiCKRAGE IS SHUTTING DOWN!!!') # shutdown/restart webserver self.srWebServer.shutdown() # shutdown scheduler self.srLogger.info("Shutting down scheduler") self.srScheduler.shutdown() # shutdown show queue if self.SHOWQUEUE: self.srLogger.info("Shutting down show queue") self.SHOWQUEUE.shutdown() # shutdown search queue if self.SEARCHQUEUE: self.srLogger.info("Shutting down search queue") self.SEARCHQUEUE.shutdown() # log out of ADBA if sickrage.srCore.ADBA_CONNECTION: self.srLogger.info("Logging out ANIDB connection") sickrage.srCore.ADBA_CONNECTION.logout() # save all show and config settings self.save_all() # shutdown logging self.srLogger.shutdown() # delete pid file if sickrage.DAEMONIZE: sickrage.delpid(sickrage.PID_FILE) def save_all(self): # write all shows self.srLogger.info("Saving all shows to the database") for SHOW in self.SHOWLIST: try: SHOW.saveToDB() except: continue # save config self.srConfig.save() def load_shows(self): """ Populates the showlist with shows from the database """ for dbData in [ x['doc'] for x in MainDB().db.all('tv_shows', with_doc=True) ]: try: self.srLogger.debug("Loading data for show: [%s]", dbData['show_name']) show = TVShow(int(dbData['indexer']), int(dbData['indexer_id'])) show.nextEpisode() self.SHOWLIST += [show] except Exception as e: self.srLogger.error("Show error in [%s]: %s" % (dbData['location'], e.message))
class Core(object): def __init__(self): self.started = False self.io_loop = IOLoop.current() # process id self.PID = os.getpid() # generate notifiers dict self.notifiersDict = notifiersDict() # generate metadata providers dict self.metadataProvidersDict = metadataProvidersDict() # generate providers dict self.providersDict = providersDict() # init notification queue self.srNotifications = Notifications() # init logger self.srLogger = srLogger() # init config self.srConfig = srConfig() # init databases self.mainDB = MainDB() self.cacheDB = CacheDB() self.failedDB = FailedDB() # init scheduler service self.srScheduler = TornadoScheduler() # init web server self.srWebServer = srWebServer() # init web client session self.srWebSession = srSession() # google api self.googleAuth = googleAuth() # name cache self.NAMECACHE = srNameCache() # queues self.SHOWQUEUE = srShowQueue() self.SEARCHQUEUE = srSearchQueue() # updaters self.VERSIONUPDATER = srVersionUpdater() self.SHOWUPDATER = srShowUpdater() # searchers self.DAILYSEARCHER = srDailySearcher() self.BACKLOGSEARCHER = srBacklogSearcher() self.PROPERSEARCHER = srProperSearcher() self.TRAKTSEARCHER = srTraktSearcher() self.SUBTITLESEARCHER = srSubtitleSearcher() # auto postprocessor self.AUTOPOSTPROCESSOR = srPostProcessor() # sickrage version self.NEWEST_VERSION = None self.NEWEST_VERSION_STRING = None # anidb connection self.ADBA_CONNECTION = None # show list self.SHOWLIST = [] def start(self): self.started = True # thread name threading.currentThread().setName('CORE') # Check if we need to perform a restore first if os.path.exists( os.path.abspath(os.path.join(sickrage.DATA_DIR, 'restore'))): success = restoreSR( os.path.abspath(os.path.join(sickrage.DATA_DIR, 'restore')), sickrage.DATA_DIR) print("Restoring SiCKRAGE backup: %s!\n" % ("FAILED", "SUCCESSFUL")[success]) if success: shutil.rmtree(os.path.abspath( os.path.join(sickrage.DATA_DIR, 'restore')), ignore_errors=True) # migrate old database file names to new ones if os.path.isfile( os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickbeard.db'))): if os.path.isfile(os.path.join(sickrage.DATA_DIR, 'sickrage.db')): helpers.moveFile( os.path.join(sickrage.DATA_DIR, 'sickrage.db'), os.path.join( sickrage.DATA_DIR, '{}.bak-{}'.format( 'sickrage.db', datetime.datetime.now().strftime( '%Y%m%d_%H%M%S')))) helpers.moveFile( os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickbeard.db')), os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sickrage.db'))) # load config self.srConfig.load() # set socket timeout socket.setdefaulttimeout(self.srConfig.SOCKET_TIMEOUT) # setup logger settings self.srLogger.logSize = self.srConfig.LOG_SIZE self.srLogger.logNr = self.srConfig.LOG_NR self.srLogger.logFile = self.srConfig.LOG_FILE self.srLogger.debugLogging = sickrage.DEBUG self.srLogger.consoleLogging = not sickrage.QUITE # start logger self.srLogger.start() # Check available space try: total_space, available_space = getFreeSpace(sickrage.DATA_DIR) if available_space < 100: self.srLogger.error( 'Shutting down as SiCKRAGE needs some space to work. You\'ll get corrupted data otherwise. Only %sMB left', available_space) sickrage.restart = False return except: self.srLogger.error('Failed getting diskspace: %s', traceback.format_exc()) # perform database startup actions for db in [self.mainDB, self.cacheDB, self.failedDB]: # initialize database db.initialize() # check integrity of database db.check_integrity() # migrate database db.migrate() # misc database cleanups db.cleanup() # compact main database if not sickrage.DEVELOPER and self.srConfig.LAST_DB_COMPACT < time.time( ) - 604800: # 7 days self.mainDB.compact() self.srConfig.LAST_DB_COMPACT = int(time.time()) # load data for shows from database self.load_shows() if self.srConfig.DEFAULT_PAGE not in ('home', 'schedule', 'history', 'news', 'IRC'): self.srConfig.DEFAULT_PAGE = 'home' # cleanup cache folder for folder in ['mako', 'sessions', 'indexers']: try: shutil.rmtree(os.path.join(sickrage.CACHE_DIR, folder), ignore_errors=True) except Exception: continue # init anidb connection if self.srConfig.USE_ANIDB: try: self.ADBA_CONNECTION = adba.Connection( keepAlive=True, log=lambda msg: self.srLogger.debug( "AniDB: %s " % msg)).auth(self.srConfig.ANIDB_USERNAME, self.srConfig.ANIDB_PASSWORD) except Exception as e: self.srLogger.warning("AniDB exception msg: %r " % repr(e)) if self.srConfig.WEB_PORT < 21 or self.srConfig.WEB_PORT > 65535: self.srConfig.WEB_PORT = 8081 if not self.srConfig.WEB_COOKIE_SECRET: self.srConfig.WEB_COOKIE_SECRET = generateCookieSecret() # attempt to help prevent users from breaking links by using a bad url if not self.srConfig.ANON_REDIRECT.endswith('?'): self.srConfig.ANON_REDIRECT = '' if not re.match(r'\d+\|[^|]+(?:\|[^|]+)*', self.srConfig.ROOT_DIRS): self.srConfig.ROOT_DIRS = '' self.srConfig.NAMING_FORCE_FOLDERS = check_force_season_folders() if self.srConfig.NZB_METHOD not in ('blackhole', 'sabnzbd', 'nzbget'): self.srConfig.NZB_METHOD = 'blackhole' if self.srConfig.TORRENT_METHOD not in ('blackhole', 'utorrent', 'transmission', 'deluge', 'deluged', 'download_station', 'rtorrent', 'qbittorrent', 'mlnet', 'putio'): self.srConfig.TORRENT_METHOD = 'blackhole' if self.srConfig.PROPER_SEARCHER_INTERVAL not in ('15m', '45m', '90m', '4h', 'daily'): self.srConfig.PROPER_SEARCHER_INTERVAL = 'daily' if self.srConfig.AUTOPOSTPROCESSOR_FREQ < self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ: self.srConfig.AUTOPOSTPROCESSOR_FREQ = self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ if self.srConfig.NAMECACHE_FREQ < self.srConfig.MIN_NAMECACHE_FREQ: self.srConfig.NAMECACHE_FREQ = self.srConfig.MIN_NAMECACHE_FREQ if self.srConfig.DAILY_SEARCHER_FREQ < self.srConfig.MIN_DAILY_SEARCHER_FREQ: self.srConfig.DAILY_SEARCHER_FREQ = self.srConfig.MIN_DAILY_SEARCHER_FREQ self.srConfig.MIN_BACKLOG_SEARCHER_FREQ = self.BACKLOGSEARCHER.get_backlog_cycle_time( ) if self.srConfig.BACKLOG_SEARCHER_FREQ < self.srConfig.MIN_BACKLOG_SEARCHER_FREQ: self.srConfig.BACKLOG_SEARCHER_FREQ = self.srConfig.MIN_BACKLOG_SEARCHER_FREQ if self.srConfig.VERSION_UPDATER_FREQ < self.srConfig.MIN_VERSION_UPDATER_FREQ: self.srConfig.VERSION_UPDATER_FREQ = self.srConfig.MIN_VERSION_UPDATER_FREQ if self.srConfig.SHOWUPDATE_HOUR > 23: self.srConfig.SHOWUPDATE_HOUR = 0 elif self.srConfig.SHOWUPDATE_HOUR < 0: self.srConfig.SHOWUPDATE_HOUR = 0 if self.srConfig.SUBTITLE_SEARCHER_FREQ < self.srConfig.MIN_SUBTITLE_SEARCHER_FREQ: self.srConfig.SUBTITLE_SEARCHER_FREQ = self.srConfig.MIN_SUBTITLE_SEARCHER_FREQ if self.srConfig.SUBTITLES_LANGUAGES[0] == '': self.srConfig.SUBTITLES_LANGUAGES = [] # add version checker job self.srScheduler.add_job( self.VERSIONUPDATER.run, srIntervalTrigger( **{ 'hours': self.srConfig.VERSION_UPDATER_FREQ, 'min': self.srConfig.MIN_VERSION_UPDATER_FREQ }), name="VERSIONUPDATER", id="VERSIONUPDATER") # add network timezones updater job self.srScheduler.add_job(update_network_dict, srIntervalTrigger(**{'days': 1}), name="TZUPDATER", id="TZUPDATER") # add show updater job self.srScheduler.add_job( self.SHOWUPDATER.run, srIntervalTrigger( **{ 'days': 1, 'start_date': datetime.datetime.now().replace( hour=self.srConfig.SHOWUPDATE_HOUR) }), name="SHOWUPDATER", id="SHOWUPDATER") # add show next episode job self.srScheduler.add_job(self.SHOWUPDATER.nextEpisode, srIntervalTrigger(**{'hours': 1}), name="SHOWNEXTEP", id="SHOWNEXTEP") # add daily search job self.srScheduler.add_job(self.DAILYSEARCHER.run, srIntervalTrigger( **{ 'minutes': self.srConfig.DAILY_SEARCHER_FREQ, 'min': self.srConfig.MIN_DAILY_SEARCHER_FREQ, 'start_date': datetime.datetime.now() + datetime.timedelta(minutes=4) }), name="DAILYSEARCHER", id="DAILYSEARCHER") # add backlog search job self.srScheduler.add_job( self.BACKLOGSEARCHER.run, srIntervalTrigger( **{ 'minutes': self.srConfig.BACKLOG_SEARCHER_FREQ, 'min': self.srConfig.MIN_BACKLOG_SEARCHER_FREQ, 'start_date': datetime.datetime.now() + datetime.timedelta(minutes=30) }), name="BACKLOG", id="BACKLOG") # add auto-postprocessing job self.srScheduler.add_job( self.AUTOPOSTPROCESSOR.run, srIntervalTrigger( **{ 'minutes': self.srConfig.AUTOPOSTPROCESSOR_FREQ, 'min': self.srConfig.MIN_AUTOPOSTPROCESSOR_FREQ }), name="POSTPROCESSOR", id="POSTPROCESSOR") # add find proper job self.srScheduler.add_job( self.PROPERSEARCHER.run, srIntervalTrigger( **{ 'minutes': { '15m': 15, '45m': 45, '90m': 90, '4h': 4 * 60, 'daily': 24 * 60 }[self.srConfig.PROPER_SEARCHER_INTERVAL] }), name="PROPERSEARCHER", id="PROPERSEARCHER") # add trakt.tv checker job self.srScheduler.add_job(self.TRAKTSEARCHER.run, srIntervalTrigger(**{'hours': 1}), name="TRAKTSEARCHER", id="TRAKTSEARCHER") # add subtitles finder job self.srScheduler.add_job( self.SUBTITLESEARCHER.run, srIntervalTrigger( **{'hours': self.srConfig.SUBTITLE_SEARCHER_FREQ}), name="SUBTITLESEARCHER", id="SUBTITLESEARCHER") # start scheduler service self.srScheduler.start() # Pause/Resume PROPERSEARCHER job (self.srScheduler.get_job('PROPERSEARCHER').pause, self.srScheduler.get_job('PROPERSEARCHER').resume )[self.srConfig.DOWNLOAD_PROPERS]() # Pause/Resume TRAKTSEARCHER job (self.srScheduler.get_job('TRAKTSEARCHER').pause, self.srScheduler.get_job('TRAKTSEARCHER').resume )[self.srConfig.USE_TRAKT]() # Pause/Resume SUBTITLESEARCHER job (self.srScheduler.get_job('SUBTITLESEARCHER').pause, self.srScheduler.get_job('SUBTITLESEARCHER').resume )[self.srConfig.USE_SUBTITLES]() # Pause/Resume POSTPROCESS job (self.srScheduler.get_job('POSTPROCESSOR').pause, self.srScheduler.get_job('POSTPROCESSOR').resume )[self.srConfig.PROCESS_AUTOMATICALLY]() # start queue's self.SEARCHQUEUE.start() self.SHOWQUEUE.start() # start webserver self.srWebServer.start() self.srLogger.info("SiCKRAGE :: STARTED") self.srLogger.info("SiCKRAGE :: VERSION:[{}]".format( self.VERSIONUPDATER.version)) self.srLogger.info("SiCKRAGE :: CONFIG:[{}] [v{}]".format( sickrage.CONFIG_FILE, self.srConfig.CONFIG_VERSION)) self.srLogger.info("SiCKRAGE :: URL:[{}://{}:{}/]".format( ('http', 'https')[self.srConfig.ENABLE_HTTPS], self.srConfig.WEB_HOST, self.srConfig.WEB_PORT)) # launch browser window if all( [not sickrage.NOLAUNCH, sickrage.srCore.srConfig.LAUNCH_BROWSER]): threading.Thread( None, lambda: launch_browser( ('http', 'https')[sickrage.srCore.srConfig.ENABLE_HTTPS], self.srConfig.WEB_HOST, sickrage.srCore.srConfig.WEB_PORT) ).start() # start ioloop event handler self.io_loop.start() def shutdown(self): if self.started: self.started = False self.srLogger.info('SiCKRAGE IS SHUTTING DOWN!!!') # shutdown/restart webserver self.srWebServer.shutdown() # shutdown show queue if self.SHOWQUEUE: self.srLogger.debug("Shutting down show queue") self.SHOWQUEUE.shutdown() # shutdown search queue if self.SEARCHQUEUE: self.srLogger.debug("Shutting down search queue") self.SEARCHQUEUE.shutdown() # log out of ADBA if sickrage.srCore.ADBA_CONNECTION: self.srLogger.debug("Logging out ANIDB connection") sickrage.srCore.ADBA_CONNECTION.logout() # save all show and config settings self.save_all() # close databases for db in [self.mainDB, self.cacheDB, self.failedDB]: db.close() # shutdown logging self.srLogger.close() # stop daemon process if not sickrage.restart and sickrage.daemon: sickrage.daemon.stop() def save_all(self): # write all shows self.srLogger.info("Saving all shows to the database") for SHOW in self.SHOWLIST: try: SHOW.saveToDB() except: continue # save config self.srConfig.save() def load_shows(self): """ Populates the showlist with shows from the database """ self.NAMECACHE.load() for dbData in [ x['doc'] for x in self.mainDB.db.all('tv_shows', with_doc=True) ]: try: self.srLogger.debug("Loading data for show: [%s]", dbData['show_name']) show = TVShow(int(dbData['indexer']), int(dbData['indexer_id'])) if not sickrage.DEVELOPER: show.nextEpisode() self.NAMECACHE.build(show) self.SHOWLIST += [show] except Exception as e: self.srLogger.error("Show error in [%s]: %s" % (dbData['location'], e.message))
class SchedulerManager(): def __init__(self, config=None, syncobj=None): if config is None: config = Config() self.config = config executors = { 'default': ThreadPoolExecutor(20), 'processpool': ProcessPoolExecutor(5) } self.scheduler = TornadoScheduler(executors=executors) self.task_queue = Queue() self.poll_task_queue_callback = None self.pool_task_queue_interval = 10 self.ioloop = IOLoop.instance() self.poll_task_queue_callback = PeriodicCallback( self.poll_task_queue, self.pool_task_queue_interval * 1000) self.clear_finished_jobs_callback = PeriodicCallback( self.clear_finished_jobs, 60 * 1000) self.reset_timeout_job_callback = PeriodicCallback( self.reset_timeout_job, 10 * 1000) self.sync_obj = syncobj if syncobj is not None: self.sync_obj.set_on_remove_schedule_job( self.on_cluster_remove_scheduling_job) self.sync_obj.set_on_add_schedule_job( self.on_cluster_add_scheduling_job) def init(self): session = Session() # move completed jobs into history for job in session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.status.in_((2, 3))): historical_job = HistoricalJob() historical_job.id = job.id historical_job.spider_id = job.spider_id historical_job.project_name = job.project_name historical_job.spider_name = job.spider_name historical_job.fire_time = job.fire_time historical_job.start_time = job.start_time historical_job.complete_time = job.update_time historical_job.status = job.status session.delete(job) session.add(historical_job) session.commit() # init triggers triggers = session.query(Trigger) for trigger in triggers: try: self.add_job(trigger.id, trigger.cron_pattern) except InvalidCronExpression: logger.warning('Trigger %d,%s cannot be added ' % (trigger.id, trigger.cron_pattern)) session.close() self.scheduler.start() self.poll_task_queue_callback.start() self.clear_finished_jobs_callback.start() self.reset_timeout_job_callback.start() def poll_task_queue(self): if self.task_queue.empty(): with session_scope() as session: tasks_to_run = session.query(SpiderExecutionQueue).filter_by( status=0).order_by(SpiderExecutionQueue.update_time).slice( 0, 10) for task in tasks_to_run: self.task_queue.put(task) def build_cron_trigger(self, cron): cron_parts = cron.split(' ') if len(cron_parts) != 5: raise InvalidCronExpression() try: crontrigger = CronTrigger( minute=cron_parts[0], hour=cron_parts[1], day=cron_parts[2], month=cron_parts[3], day_of_week=cron_parts[4], ) return crontrigger except ValueError: raise InvalidCronExpression() def add_job(self, trigger_id, cron): logger.debug('adding trigger %s %s' % (trigger_id, cron)) crontrigger = self.build_cron_trigger(cron) job = self.scheduler.add_job(func=self.trigger_fired, trigger=crontrigger, kwargs={'trigger_id': trigger_id}, id=str(trigger_id), replace_existing=True) if self.sync_obj: self.ioloop.call_later(0, self.sync_obj.add_schedule_job, trigger_id) #self.sync_obj.add_schedule_job(trigger_id) def on_cluster_remove_scheduling_job(self, job_id): logger.debug('on_cluster_remove_scheduling_job') if self.scheduler.get_job(job_id): self.scheduler.remove_job(job_id) def on_cluster_add_scheduling_job(self, trigger_id): logger.debug('on_cluster_add_scheduling_job') with session_scope() as session: trigger = session.query(Trigger).filter_by(id=trigger_id).first() if trigger is None: return crontrigger = self.build_cron_trigger(trigger.cron_pattern) job = self.scheduler.add_job(func=self.trigger_fired, trigger=crontrigger, kwargs={'trigger_id': trigger_id}, id=str(trigger_id), replace_existing=True) def trigger_fired(self, trigger_id): with session_scope() as session: trigger = session.query(Trigger).filter_by(id=trigger_id).first() if not trigger: logger.error('Trigger %s not found.' % trigger_id) return spider = session.query(Spider).filter_by( id=trigger.spider_id).first() project = session.query(Project).filter_by( id=spider.project_id).first() executing = session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.spider_id == spider.id, SpiderExecutionQueue.status.in_([0, 1])) concurrency_setting = session.query(SpiderSettings).filter_by( spider_id=spider.id, setting_key='concurrency').first() concurrency = int( concurrency_setting.value) if concurrency_setting else 1 executing_slots = [ executing_job.slot for executing_job in executing ] free_slots = [ x for x in range(1, concurrency + 1) if x not in executing_slots ] if not free_slots: logger.warning( 'spider %s-%s is configured as %d concurency, and %d in queue, skipping' % (project.name, spider.name, concurrency, len(executing_slots))) return executing = SpiderExecutionQueue() executing.id = generate_job_id() executing.spider_id = spider.id executing.project_name = project.name executing.spider_name = spider.name executing.fire_time = datetime.datetime.now() executing.update_time = datetime.datetime.now() executing.slot = free_slots[0] session.add(executing) try: session.commit() except (Exception, IntegrityError) as e: logger.warning(e) session.close() return def add_schedule(self, project, spider, cron): with session_scope() as session: triggers = session.query(Trigger).filter( Trigger.spider_id == spider.id) found = False for trigger in triggers: if trigger.cron_pattern == cron: found = True break if not found: # create a cron_trigger for just validating cron_trigger = self.build_cron_trigger(cron) trigger = Trigger() trigger.spider_id = spider.id trigger.cron_pattern = cron session.add(trigger) session.commit() self.add_job(trigger.id, cron) def add_task(self, project_name, spider_name): session = Session() project = session.query(Project).filter( Project.name == project_name).first() spider = session.query(Spider).filter( Spider.name == spider_name, Spider.project_id == project.id).first() try: existing = list( session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.spider_id == spider.id, SpiderExecutionQueue.status.in_([0, 1]))) if existing: logger.warning('job %s_%s is running, ignoring schedule' % (project.name, spider.name)) raise JobRunning(existing[0].id) executing = SpiderExecutionQueue() jobid = generate_job_id() executing.id = jobid executing.spider_id = spider.id executing.project_name = project.name executing.spider_name = spider.name executing.fire_time = datetime.datetime.now() executing.update_time = datetime.datetime.now() session.add(executing) session.commit() session.refresh(executing) return executing finally: session.close() def on_node_expired(self, node_id): session = Session() for job in session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.node_id == node_id, SpiderExecutionQueue.status == 1): job.status = 0 job.update_time = datetime.datetime.now() job.start_time = None job.pid = None job.node_id = None session.add(job) session.commit() session.close() def jobs(self): session = Session() pending = list( session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.status == 0)) running = list( session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.status == 1)) finished = list(session.query(HistoricalJob).slice(0, 100)) session.close() return pending, running, finished def job_start(self, jobid, pid): with session_scope() as session: job = session.query(SpiderExecutionQueue).filter_by( id=jobid).first() if job.start_time is None: job.start_time = datetime.datetime.now() job.update_time = datetime.datetime.now() if job.pid is None and pid: job.pid = pid session.add(job) session.commit() session.close() def get_next_task(self, node_id): if not self.task_queue.empty(): session = Session() try: next_task = self.task_queue.get_nowait() except Empty: return None next_task = session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.id == next_task.id, SpiderExecutionQueue.status == 0).first() if not next_task: return None next_task.start_time = datetime.datetime.now() next_task.update_time = datetime.datetime.now() next_task.node_id = node_id next_task.status = 1 session.add(next_task) session.commit() session.refresh(next_task) session.close() return next_task return None def has_task(self): return not self.task_queue.empty() def jobs_running(self, node_id, job_ids): ''' :param node_id: :param job_ids: :return:(job_id) to kill ''' with session_scope() as session: for job_id in job_ids: job = session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.id == job_id).first() if job: if job.node_id is None: job.node_id = node_id if job.node_id != node_id or \ job.status != 1: yield job.id else: job.update_time = datetime.datetime.now() session.add(job) else: yield job_id session.commit() def job_finished(self, job, log_file=None, items_file=None): session = Session() if job.status not in (2, 3): raise Exception('Invliad status.') job_status = job.status job = session.query(SpiderExecutionQueue).filter_by(id=job.id).first() job.status = job_status job.update_time = datetime.datetime.now() historical_job = HistoricalJob() historical_job.id = job.id historical_job.spider_id = job.spider_id historical_job.project_name = job.project_name historical_job.spider_name = job.spider_name historical_job.fire_time = job.fire_time historical_job.start_time = job.start_time historical_job.complete_time = job.update_time historical_job.status = job.status if log_file: historical_job.log_file = log_file import re items_crawled_pattern = re.compile( "\'item_scraped_count\': (\d+),") error_log_pattern = re.compile("\'log_count/ERROR\': (\d+),") warning_log_pattern = re.compile("\'log_count/WARNING\': (\d+),") with open(log_file, 'r') as f: log_content = f.read() m = items_crawled_pattern.search(log_content) if m: historical_job.items_count = int(m.group(1)) m = error_log_pattern.search(log_content) if m and historical_job.status == JOB_STATUS_SUCCESS: historical_job.status = JOB_STATUS_FAIL m = warning_log_pattern.search(log_content) if m and historical_job.status == JOB_STATUS_SUCCESS: historical_job.status = JOB_STATUS_WARNING if items_file: historical_job.items_file = items_file session.delete(job) session.add(historical_job) session.commit() session.refresh(historical_job) # send mail if historical_job.status == JOB_STATUS_FAIL: self.try_send_job_failed_mail(historical_job) session.close() return historical_job def try_send_job_failed_mail(self, job): logger.debug('try_send_job_failed_mail') job_fail_send_mail = self.config.getboolean('job_fail_send_mail') if job_fail_send_mail: try: mail_sender = MailSender(self.config) subject = 'scrapydd job failed' to_address = self.config.get('job_fail_mail_receiver') content = 'bot:%s \r\nspider:%s \r\n job_id:%s \r\n' % ( job.spider.project.name, job.spider_name, job.id) mail_sender.send(to_addresses=to_address, subject=subject, content=content) except Exception as e: logger.error('Error when sending job_fail mail %s' % e) def clear_finished_jobs(self): job_history_limit_each_spider = 100 with session_scope() as session: spiders = list(session.query(distinct(HistoricalJob.spider_id))) for row in spiders: spider_id = row[0] with session_scope() as session: over_limitation_jobs = list(session.query(HistoricalJob)\ .filter(HistoricalJob.spider_id==spider_id)\ .order_by(desc(HistoricalJob.complete_time))\ .slice(job_history_limit_each_spider, 1000)\ .all()) for over_limitation_job in over_limitation_jobs: self._remove_histical_job(over_limitation_job) def _clear_running_jobs(self): with session_scope() as session: jobs = list( session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.status.in_([0, 1]))) for job in jobs: self._remove_histical_job(job) def reset_timeout_job(self): with session_scope() as session: timeout_time = datetime.datetime.now() - datetime.timedelta( minutes=1) for job in session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.status == 1): spider = session.query(Spider).filter_by( id=job.spider_id).first() job_timeout_setting = session.query(SpiderSettings).filter_by( spider_id=spider.id, setting_key='timeout').first() job_timeout = int( job_timeout_setting.value) if job_timeout_setting else 3600 logger.debug((job.update_time - job.start_time).seconds) if job.update_time < timeout_time: # job is not refresh as expected, node might be died, reset the status to PENDING job.status = 0 job.pid = None job.node_id = None job.update_time = datetime.datetime.now() session.add(job) logger.info('Job %s is timeout, reseting.' % job.id) elif (job.update_time - job.start_time).seconds > job_timeout: # job is running too long, should be killed historical_job = HistoricalJob() historical_job.id = job.id historical_job.spider_id = job.spider_id historical_job.project_name = job.project_name historical_job.spider_name = job.spider_name historical_job.fire_time = job.fire_time historical_job.start_time = job.start_time historical_job.complete_time = job.update_time historical_job.status = 3 session.delete(job) session.add(historical_job) logger.info('Job %s is timeout, killed.' % job.id) session.commit() def _remove_histical_job(self, job): ''' @type job: HistoricalJob ''' with session_scope() as session: job = session.query(HistoricalJob).filter( HistoricalJob.id == job.id).first() if job.items_file: try: os.remove(job.items_file) except Exception as e: logger.warning(e.message) if job.log_file: try: os.remove(job.log_file) except Exception as e: logger.warning(e.message) original_log_file = os.path.join('logs', job.project_name, job.spider_name, '%s.log' % job.id) if os.path.exists(original_log_file): os.remove(original_log_file) original_items_file = os.path.join('items', job.project_name, job.spider_name, '%s.jl' % job.id) if os.path.exists(original_items_file): os.remove(original_items_file) session.delete(job) def remove_schedule(self, project_name, spider_name, trigger_id): with session_scope() as session: project = session.query(Project).filter( Project.name == project_name).first() spider = session.query(Spider).filter( Spider.project_id == project.id, Spider.name == spider_name).first() trigger = session.query(Trigger).filter( Trigger.spider_id == spider.id, Trigger.id == trigger_id).first() session.delete(trigger) if self.scheduler.get_job(str(trigger_id)): self.scheduler.remove_job(str(trigger.id)) if self.sync_obj: logger.info('remove_schedule') self.sync_obj.remove_schedule_job(trigger.id)
class SchedulerManager(): def __init__(self, config=None, syncobj=None): if config is None: config = Config() self.config = config executors = { 'default': ThreadPoolExecutor(20), } self.project_storage_dir = config.get('project_storage_dir') self.scheduler = TornadoScheduler(executors=executors) self.poll_task_queue_callback = None self.pool_task_queue_interval = 10 self.ioloop = IOLoop.instance() self.clear_finished_jobs_callback = PeriodicCallback(self.clear_finished_jobs, 60*1000) self.reset_timeout_job_callback = PeriodicCallback(self.reset_timeout_job, 10*1000) self.sync_obj = syncobj if syncobj is not None: self.sync_obj.set_on_remove_schedule_job(self.on_cluster_remove_scheduling_job) self.sync_obj.set_on_add_schedule_job(self.on_cluster_add_scheduling_job) def init(self): session = Session() # move completed jobs into history for job in session.query(SpiderExecutionQueue)\ .filter(SpiderExecutionQueue.status.in_((2, 3))): historical_job = HistoricalJob() historical_job.id = job.id historical_job.spider_id = job.spider_id historical_job.project_name = job.project_name historical_job.spider_name = job.spider_name historical_job.fire_time = job.fire_time historical_job.start_time = job.start_time historical_job.complete_time = job.update_time historical_job.status = job.status session.delete(job) session.add(historical_job) session.commit() # init triggers triggers = session.query(Trigger) for trigger in triggers: try: self.add_job(trigger.id, trigger.cron_pattern) except InvalidCronExpression: LOGGER.warning('Trigger %d,%s cannot be added ', (trigger.id, trigger.cron_pattern)) session.close() self.scheduler.start() self.clear_finished_jobs_callback.start() self.reset_timeout_job_callback.start() def build_cron_trigger(self, cron): cron_parts = cron.split(' ') if len(cron_parts) != 5: raise InvalidCronExpression() try: crontrigger = CronTrigger(minute=cron_parts[0], hour=cron_parts[1], day=cron_parts[2], month=cron_parts[3], day_of_week=cron_parts[4], ) return crontrigger except ValueError: raise InvalidCronExpression() def add_job(self, trigger_id, cron): LOGGER.debug('adding trigger %s %s' % (trigger_id, cron)) crontrigger = self.build_cron_trigger(cron) job = self.scheduler.add_job(func=self.trigger_fired, trigger=crontrigger, kwargs={'trigger_id': trigger_id}, id=str(trigger_id), replace_existing=True) if self.sync_obj: self.ioloop.call_later(0, self.sync_obj.add_schedule_job, trigger_id) def on_cluster_remove_scheduling_job(self, job_id): LOGGER.debug('on_cluster_remove_scheduling_job') if self.scheduler.get_job(job_id): self.scheduler.remove_job(job_id) def on_cluster_add_scheduling_job(self, trigger_id): LOGGER.debug('on_cluster_add_scheduling_job') with session_scope() as session: trigger = session.query(Trigger).get(trigger_id) if trigger is None: return crontrigger = self.build_cron_trigger(trigger.cron_pattern) job = self.scheduler.add_job(func=self.trigger_fired, trigger=crontrigger, kwargs={'trigger_id': trigger_id}, id=str(trigger_id), replace_existing=True) def trigger_fired(self, trigger_id): with session_scope() as session: trigger = session.query(Trigger).filter_by(id=trigger_id).first() if not trigger: LOGGER.error('Trigger %s not found.' % trigger_id) return spider = session.query(Spider).filter_by(id=trigger.spider_id).first() if not spider: LOGGER.error('Spider %s not found' % spider.name) return project = session.query(Project).filter_by(id=spider.project_id).first() if not project: LOGGER.error('Project %s not found' % project.name) return try: self.add_task(project.name, spider.name) except JobRunning: LOGGER.info('Job for spider %s.%s already reach the ' 'concurrency limit' % (project.name, spider.name)) def add_schedule(self, project, spider, cron): with session_scope()as session: triggers = session.query(Trigger)\ .filter(Trigger.spider_id == spider.id) found = False for trigger in triggers: if trigger.cron_pattern == cron: found = True break if not found: # create a cron_trigger for just validating cron_trigger = self.build_cron_trigger(cron) trigger = Trigger() trigger.spider_id = spider.id trigger.cron_pattern = cron session.add(trigger) session.commit() self.add_job(trigger.id, cron) def add_task(self, project_name, spider_name, settings=None): with session_scope() as session: project = session.query(Project)\ .filter(Project.name == project_name).first() spider = session.query(Spider)\ .filter(Spider.name == spider_name, Spider.project_id == project.id).first() executing = SpiderExecutionQueue() spider_tag_vo = session.query(SpiderSettings)\ .filter_by(spider_id=spider.id, setting_key='tag').first() spider_tag = spider_tag_vo.value if spider_tag_vo else None jobid = generate_job_id() executing.id = jobid executing.spider_id = spider.id executing.project_name = project.name executing.spider_name = spider.name executing.fire_time = datetime.datetime.now() executing.update_time = datetime.datetime.now() executing.tag = spider_tag if settings: executing.settings = json.dumps(settings) session.add(executing) session.commit() session.refresh(executing) return executing def cancel_task(self, job_id): with session_scope() as session: job = session.query(SpiderExecutionQueue).get(job_id) if not job: raise JobNotFound() if job.status not in (JOB_STATUS_PENDING, JOB_STATUS_RUNNING): raise InvalidJobStatus('Invliad status.') job.status = JOB_STATUS_CANCEL job.update_time = datetime.datetime.now() historical_job = HistoricalJob() historical_job.id = job.id historical_job.spider_id = job.spider_id historical_job.project_name = job.project_name historical_job.spider_name = job.spider_name historical_job.fire_time = job.fire_time historical_job.start_time = job.start_time historical_job.complete_time = job.update_time historical_job.status = job.status session.delete(job) session.add(historical_job) session.commit() session.refresh(historical_job) def on_node_expired(self, node_id): session = Session() for job in session.query(SpiderExecutionQueue)\ .filter(SpiderExecutionQueue.node_id == node_id, SpiderExecutionQueue.status == 1): job.status = 0 job.update_time = datetime.datetime.now() job.start_time = None job.pid = None job.node_id = None session.add(job) session.commit() session.close() def jobs(self): session = Session() pending = list(session.query(SpiderExecutionQueue) .filter_by(status=JOB_STATUS_PENDING)) running = list(session.query(SpiderExecutionQueue) .filter_by(status=JOB_STATUS_RUNNING)) finished = list(session.query(HistoricalJob) .order_by(desc(HistoricalJob.complete_time)) .slice(0, 100)) session.close() return pending, running, finished def job_start(self, jobid, pid): with session_scope() as session: job = session.query(SpiderExecutionQueue).filter_by(id=jobid).first() if job.start_time is None: job.start_time = datetime.datetime.now() job.update_time = datetime.datetime.now() if job.pid is None and pid: job.pid = pid session.add(job) session.commit() session.close() def _regular_agent_tags(self, agent_tags): if agent_tags is None: return None if isinstance(agent_tags, string_types): return agent_tags.split(',') return agent_tags def get_next_task(self, node_id): """ Get next task for node_id, if exists, update the job status, track it with node_id. :param node_id: node_id :return: the running job """ with session_scope() as session: node = session.query(Node).filter(Node.id == node_id).first() if not node: raise NodeNotFound() node_tags = node.tags next_task = self._get_next_task(session, node_tags) if not next_task: return None now = self._now() next_task.start_time = now next_task.update_time = now next_task.node_id = node_id next_task.status = JOB_STATUS_RUNNING session.add(next_task) session.commit() session.refresh(next_task) return next_task def _get_next_task(self, session, agent_tags): # result = session.query(func.) # obj, func.avg(obj.value).label("value_avg") # ).group_by( # func.strftime('%s', obj.date) # ).all() result = session.execute(""" select * from spider_execution_queue join (select min(fire_time) as fire_time, spider_id from spider_execution_queue where status=0 group by spider_id ) as a on spider_execution_queue.fire_time = a.fire_time and spider_execution_queue.spider_id = a.spider_id order by fire_time """) for job in session.query(SpiderExecutionQueue).instances(result): spider_max_concurrency = 1 spider_concurrency = session.query( func.count(SpiderExecutionQueue.id) )\ .filter(SpiderExecutionQueue.status == JOB_STATUS_RUNNING) \ .scalar() or 0 if spider_concurrency >= spider_max_concurrency: continue spider_tags = self.get_spider_tags(job.spider, session) if self._match_tags(spider_tags, agent_tags): return job return None def _match_tags(self, spider_tags, node_tags): # both empty if not spider_tags and not node_tags: return True # one empty and one not if not spider_tags or not node_tags: return False for spider_tag in spider_tags: if spider_tag not in node_tags: return False return True def get_spider_tags(self, spider, session): tags_setting = session.query(SpiderSettings) \ .filter(SpiderSettings.setting_key == 'tag', SpiderSettings.spider_id == spider.id).first() if not tags_setting: return [] if not tags_setting.value: return [] return [x for x in tags_setting.value.split(',') if x] def has_task(self, node_id): with session_scope() as session: node = session.query(Node).filter(Node.id == node_id).first() if node is None: raise NodeNotFound() node_tags = self._regular_agent_tags(node.tags) next_task = self._get_next_task(session, node_tags) return next_task is not None def jobs_running(self, node_id, job_ids): ''' Update running jobs for node. If any job status is wrong, let node kill it :param node_id: :param job_ids: :return:(job_id) to kill ''' jobs_to_kill = [] with session_scope() as session: for job_id in job_ids: job = session.query(SpiderExecutionQueue).filter( SpiderExecutionQueue.id == job_id).first() if job: if job.node_id is None: job.node_id = node_id if job.node_id != node_id or \ job.status != 1: jobs_to_kill.append(job.id) else: job.update_time = self._now() session.add(job) else: jobs_to_kill.append(job_id) session.commit() return jobs_to_kill def job_finished(self, job, log_file=None, items_file=None): session = Session() if job.status not in (JOB_STATUS_SUCCESS, JOB_STATUS_FAIL): raise Exception('Invalid status.') job_status = job.status job = session.query(SpiderExecutionQueue).filter_by(id=job.id).first() job.status = job_status job.update_time = datetime.datetime.now() project_storage = ProjectStorage(self.project_storage_dir, job.spider.project) historical_job = HistoricalJob() historical_job.id = job.id historical_job.spider_id = job.spider_id historical_job.project_name = job.project_name historical_job.spider_name = job.spider_name historical_job.fire_time = job.fire_time historical_job.start_time = job.start_time historical_job.complete_time = job.update_time historical_job.status = job.status if log_file: #historical_job.log_file = log_file import re items_crawled_pattern = re.compile(r"\'item_scraped_count\': (\d+),") error_log_pattern = re.compile(r"\'log_count/ERROR\': (\d+),") warning_log_pattern = re.compile(r"\'log_count/WARNING\': (\d+),") log_file.seek(0) log_raw = log_file.read() log_encoding = chardet.detect(log_raw)['encoding'] log_content = ensure_str(log_raw, log_encoding) m = items_crawled_pattern.search(log_content) if m: historical_job.items_count = int(m.group(1)) m = error_log_pattern.search(log_content) if m and historical_job.status == JOB_STATUS_SUCCESS: historical_job.status = JOB_STATUS_FAIL m = warning_log_pattern.search(log_content) if m and historical_job.status == JOB_STATUS_SUCCESS: historical_job.status = JOB_STATUS_WARNING log_file.seek(0) #if items_file: # historical_job.items_file = items_file if items_file: items_file.seek(0) project_storage.put_job_data(job, log_file, items_file) session.delete(job) session.add(historical_job) session.commit() session.refresh(historical_job) # send mail if historical_job.status == JOB_STATUS_FAIL: self.try_send_job_failed_mail(historical_job) session.close() return historical_job def _now(self): return datetime.datetime.now() def try_send_job_failed_mail(self, job): LOGGER.debug('try_send_job_failed_mail') job_fail_send_mail = self.config.getboolean('job_fail_send_mail') if job_fail_send_mail: try: mail_sender = MailSender(self.config) subject = 'scrapydd job failed' to_address = self.config.get('job_fail_mail_receiver') content = 'bot:%s \r\nspider:%s \r\n job_id:%s \r\n' % (job.spider.project.name, job.spider_name, job.id) mail_sender.send(to_addresses=to_address, subject=subject, content=content) except Exception as e: LOGGER.error('Error when sending job_fail mail %s' % e) def clear_finished_jobs(self): job_history_limit_each_spider = 100 with session_scope() as session: spiders = list(session.query(distinct(HistoricalJob.spider_id))) for row in spiders: spider_id = row[0] with session_scope() as session: over_limitation_jobs = session.query(HistoricalJob)\ .filter_by(spider_id=spider_id)\ .order_by(desc(HistoricalJob.complete_time))\ .slice(job_history_limit_each_spider, 1000)\ .all() for over_limitation_job in over_limitation_jobs: self._remove_histical_job(over_limitation_job) def reset_timeout_job(self): KILL_TIMEOUT = 120 now = self._now() with session_scope() as session: for job in session.query(SpiderExecutionQueue)\ .filter(SpiderExecutionQueue.status == JOB_STATUS_RUNNING): # check job time_out expire start # the next status is STOPPING then ERROR spider = session.query(Spider).get(job.spider_id) job_timeout_setting = session.query(SpiderSettings)\ .filter_by(spider_id=spider.id, setting_key='timeout').first() job_timeout = int(job_timeout_setting.value) \ if job_timeout_setting else 3600 if now > job.start_time + \ datetime.timedelta(seconds=job_timeout): job.status = JOB_STATUS_STOPPING job.update_time = self._now() session.add(job) LOGGER.info('Job %s is running timeout, stopping.', job.id) session.commit() continue # expire in not updated in a update_timeout. # may be node error, restart it # the status is PENDING if now > job.update_time + datetime.timedelta(minutes=1): job.status = JOB_STATUS_PENDING job.pid = None job.node_id = None job.update_time = self._now() session.add(job) session.commit() LOGGER.info('Job %s is update timeout, reset.', job.id) continue for job in session.query(SpiderExecutionQueue)\ .filter(SpiderExecutionQueue.status.in_([JOB_STATUS_STOPPING])): if (datetime.datetime.now() - job.start_time).seconds > KILL_TIMEOUT: # job is running too long, should be killed historical_job = HistoricalJob() historical_job.id = job.id historical_job.spider_id = job.spider_id historical_job.project_name = job.project_name historical_job.spider_name = job.spider_name historical_job.fire_time = job.fire_time historical_job.start_time = job.start_time historical_job.complete_time = job.update_time historical_job.status = 3 session.delete(job) session.add(historical_job) LOGGER.info('Job %s is timeout, killed.' % job.id) session.commit() def _remove_histical_job(self, job): ''' @type job: HistoricalJob ''' with session_scope() as session: job = session.query(HistoricalJob).filter(HistoricalJob.id == job.id).first() spider = job.spider project = spider.project project_storage_dir = self.config.get('project_storage_dir') project_storage = ProjectStorage(project_storage_dir, project) project_storage.delete_job_data(job) session.delete(job) session.commit() def remove_schedule(self, project_name, spider_name, trigger_id): with session_scope() as session: project = session.query(Project)\ .filter(Project.name == project_name).first() spider = session.query(Spider)\ .filter(Spider.project_id == project.id, Spider.name == spider_name).first() trigger = session.query(Trigger)\ .filter_by(spider_id=spider.id, id=trigger_id).first() session.delete(trigger) if self.scheduler.get_job(str(trigger_id)): self.scheduler.remove_job(str(trigger.id)) if self.sync_obj: LOGGER.info('remove_schedule') self.sync_obj.remove_schedule_job(trigger.id)