Exemplo n.º 1
0
 def __init__(self, start=True):
     try:
         config = {
             'apscheduler.daemon': True,
             'apscheduler.standalone': False,
             'apscheduler.threadpool.core_threads':
             self.__threadpool_corethreads,
             'apscheduler.threadpool.max_threads':
             self.__threadpool_maxthreads,
             'apscheduler.threadpool.keepalive':
             self.__threadpool_keepalive,
             'apscheduler.coalesce': self.__COALESCE
         }
         self.__sched = Scheduler(config)
         '''Add the SQLAlchemy job store as the default. This was surprisingly far less tedious than getting the
         shelve job store working.'''
         self.__sched.add_jobstore(
             SQLAlchemyJobStore(url=self.__mysql_url, tablename='SCHEDULE'),
             'default')
         atexit.register(lambda: self.__sched.shutdown(wait=False)
                         )  # Stop the scheduler when the program exits
         if start:
             self.__sched.start()
     except KeyError:
         logging.warning('An error occurred starting the scheduler.')
Exemplo n.º 2
0
    def setup_class(cls):
        if not SQLAlchemyJobStore:
            raise SkipTest

        from sqlalchemy import create_engine

        engine = create_engine('sqlite:///')
        cls.jobstore = SQLAlchemyJobStore(engine=engine)
Exemplo n.º 3
0
def main():
    sched = Scheduler()
    # mysql_engine = create_engine('mysql://root:@localhost:3306/fengine?charset=utf8',encoding = "utf-8",echo =True)
    mysql_engine = get_db_engine()
    sched.daemonic = False
    print "Starting index engine......"
    job_store = SQLAlchemyJobStore(engine=mysql_engine)
    sched.add_jobstore(job_store, 'default')

    list_spider_job(sched)  #将Spider的任务加入队列
    scan_engine_job(sched)  #将主索引服务加入任务队列

    # sched.add_cron_job(scan_loan_items_job,hour='*', minute='*', second='5')
    #将索引Job加入到调度系统,按照每5分钟的频率启动
    # engine_name = 'engine.py'
    # python_loc = os.path.join(os.getcwd(), engine_name)
    # sched.add_interval_job(python_job_func, seconds =5, name = engine_name, args = [python_loc])
    # list_spider_job(sched)
    sched.start()
Exemplo n.º 4
0
    def configure(self, gconfig={}, **options):
        if self.running:
            raise SchedulerAlreadyRunningError

        config = combine_opts(gconfig, 'main.', options)
        self._config = config

        self.misfire_grace_time = int(config.pop('misfire_grace_time', 1))
        self.coalesce = asbool(config.pop('coalesce', True))
        self.daemonic = asbool(config.pop('daemonic', True))
        self.standalone = asbool(config.pop('standalone', False))

        timezone = config.pop('timezone', None)
        self.timezone = gettz(timezone) if isinstance(timezone, basestring) else timezone or tzlocal()

        # config threadpool
        threadpool_opts = combine_opts(config, 'threadpool.')
        self._worker_threadpool = ThreadPool(**threadpool_opts)

        # config jobstore
        jobstore_opts = combine_opts(config, 'jobstore.')
        self._job_store = SQLAlchemyJobStore(**jobstore_opts)

        # config syncqueue
        syncqueue_opts = combine_opts(config, 'syncqueue.')
        self._changes_queue = HotQueue(**syncqueue_opts)

        # config statstore
        statstore_opts = combine_opts(config, 'statstore.')
        self._stat_store = JobReporter(**statstore_opts)

        # config statqueue
        statqueue_opts = combine_opts(config, 'statqueue.')
        self._stats_queue = HotQueue(**statqueue_opts)

        # configure logger
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)
Exemplo n.º 5
0
 def __init__(self, db_path='sqlite:///scheduler.db'):
     self.scheduler = Scheduler()
     self.scheduler.add_jobstore(SQLAlchemyJobStore(url=db_path), 'default')
Exemplo n.º 6
0
def test_sqlalchemy_alternate_tablename():
    if not SQLAlchemyJobStore:
        raise SkipTest

    store = SQLAlchemyJobStore('sqlite:///', tablename='test_table')
    eq_(store.jobs_t.name, 'test_table')
Exemplo n.º 7
0
def test_sqlalchemy_invalid_args():
    if not SQLAlchemyJobStore:
        raise SkipTest

    SQLAlchemyJobStore()
Exemplo n.º 8
0
    def setup_class(cls):
        if not SQLAlchemyJobStore:
            raise SkipTest

        cls.jobstore = SQLAlchemyJobStore(url='sqlite:///')
Exemplo n.º 9
0
from hotqueue import HotQueue

from apscheduler.job import Job
from apscheduler.jobstores.sqlalchemy_store import SQLAlchemyJobStore
from apscheduler.triggers import IntervalTrigger
from apscheduler.scripts import HttpScript


if __name__ == '__main__':

    script = HttpScript(url='http://baidu.com')
    local_tz = gettz('Asia/Chongqing')
    defaults = {'timezone': local_tz}
    trigger = IntervalTrigger(defaults, seconds=3)

    store = SQLAlchemyJobStore(url='sqlite:////tmp/task.db', tablename='tasks')
    job   = store.get_job(3)
    if not job:
        job = Job(id=3, name='BaiduCheck', script=script, trigger=trigger)
        store.add_job(job)

    print job

    job.trigger = IntervalTrigger(defaults, seconds=5)
    store.update_job(job)

    queue = HotQueue('job_changes')
    queue.put({'job_id':job.id, 'opt_type':'update'})


Exemplo n.º 10
0
class LocalScheduler(object):

    _stopped = False
    _main_thread = None

    #init worker thread pool,reporter thread,updater thread
    def __init__(self, gconfig={}, **options):
        self._wakeup = Event()
        self._job_store = None
        self._stat_store = None
        self._jobs     = {}
        self.logger   = None
        self._stats_queue = None
        self._changes_queue = None

        self._jobs_locks   = {}
        self._jobs_lock = Lock()
        self._log_queue_lock = Lock()


        self._worker_threadpool = None
        self._reporter_thread   = None
        self._main_thread       = None
        self._updater_thread    = None
        self._monitor_thread    = None

        self.configure(gconfig, **options)

    def configure(self, gconfig={}, **options):
        if self.running:
            raise SchedulerAlreadyRunningError

        config = combine_opts(gconfig, 'main.', options)
        self._config = config

        self.misfire_grace_time = int(config.pop('misfire_grace_time', 1))
        self.coalesce = asbool(config.pop('coalesce', True))
        self.daemonic = asbool(config.pop('daemonic', True))
        self.standalone = asbool(config.pop('standalone', False))

        timezone = config.pop('timezone', None)
        self.timezone = gettz(timezone) if isinstance(timezone, basestring) else timezone or tzlocal()

        # config threadpool
        threadpool_opts = combine_opts(config, 'threadpool.')
        self._worker_threadpool = ThreadPool(**threadpool_opts)

        # config jobstore
        jobstore_opts = combine_opts(config, 'jobstore.')
        self._job_store = SQLAlchemyJobStore(**jobstore_opts)

        # config syncqueue
        syncqueue_opts = combine_opts(config, 'syncqueue.')
        self._changes_queue = HotQueue(**syncqueue_opts)

        # config statstore
        statstore_opts = combine_opts(config, 'statstore.')
        self._stat_store = JobReporter(**statstore_opts)

        # config statqueue
        statqueue_opts = combine_opts(config, 'statqueue.')
        self._stats_queue = HotQueue(**statqueue_opts)

        # configure logger
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)



    def start(self):
        if self.running:
            raise SchedulerAlreadyRunningError

        self.load_jobs()

        self._stopped = False

        if self.standalone:
            self._main_loop()
        else:
            self._main_thread = Thread(target = self._main_loop, name = 'main')
            self._main_thread.setDaemon(self.daemonic)
            self._main_thread.start()
            print 'main thread is startted'

            self._updater_thread = Thread(target = self._sync_changes, name = 'update')
            self._updater_thread.setDaemon(self.daemonic)
            self._updater_thread.start()
            print 'update thread is started'

            self._stater_thread = Thread(target = self._stat_runs, name = 'stat')
            self._stater_thread.setDaemon(self.daemonic)
            self._stater_thread.start()
            print 'stat thread is started'

    def shutdown(self, shutdown_threadpool=True, close_jobstore=True):
        if not self.running:
            return 
        self._stopped = True
        self._wakeup.set()

        if shutdown_threadpool:
            self._worker_threadpool.shutdown()

        if self._main_thread:
            self._main_thread.join()

        if close_jobstore:
            self._job_store.close()

    @property
    def running(self):
        return not self._stopped and self._main_thread and self._main_thread.isAlive()
    

    def now(self):
        return datetime.now(self.timezone)

    def set_jobs(self, jobs):
        now = self.now()
        with self._jobs_lock:
            for job in jobs:
                job.compute_next_run_time(now)
                self._jobs[job.id] = job
                self._jobs_locks[job.id] = Lock()

    # loads jobs pool from db
    def load_jobs(self):
        jobs = self._job_store.load_jobs()
        now = self.now()
        with self._jobs_lock:
            for job in jobs:
                self._add_job(job)

    def _add_job(self, job):
        try:
            now = self.now()
            job.compute_next_run_time(now)
            if job.next_run_time:
                self._jobs[job.id] = job
                self._jobs_locks[job.id] = Lock()
        except:
            logger.exception("add job(id=%d, name=%s) failed" % (job.id, job.name))
            return False

        return True

    def _remove_job(self, job_id):
        try:
            with self._jobs_locks[job_id]:
                del self._jobs[job_id]
            del self._jobs_locks[job_id]
        except:
            logger.exception("remove job(id=%d) failed" % (job_id))
            return False

        return True

    def _main_loop(self):
        print "get into the main loop"
        self._wakeup.clear()
        while not self._stopped:
            print 'check again'
            now = self.now()
            next_wakeup_time = self._process_jobs(now)
            print "next_wakeup_time:", next_wakeup_time
            if next_wakeup_time is not None:
                wait_seconds = time_difference(next_wakeup_time, now)
                self._wakeup.wait(wait_seconds)
                self._wakeup.clear()
            else:
                self._wakeup.wait()
                self._wakeup.clear()
        print "get out the main loop"

    def _process_jobs(self, now):
        next_wakeup_time = None
        print self._jobs

        for job in self._jobs.values():
            run_time_list = job.get_run_times(now)

            if run_time_list:
                self._worker_threadpool.submit(self._run_job, job, run_time_list)

                with self._jobs_locks[job.id]:
                    next_run_time = job.compute_next_run_time(now + timedelta(microseconds=1))

                if not next_run_time:
                    self._remove_job(job.id)

            print 'job.next_run_time:', job.id,  job.next_run_time
            if not next_wakeup_time:
                next_wakeup_time = job.next_run_time
            elif job.next_run_time:
                next_wakeup_time = min(next_wakeup_time, job.next_run_time)

        return next_wakeup_time


    def _run_job(self, job, run_time_list):
        for run_time in run_time_list:
            now = self.now()
            difference = now - run_time
            grace_time = timedelta(seconds=self.misfire_grace_time)
            if difference > grace_time:
                self.logger.warning('Run time of job "%s" was missed by %s', job, difference)
                self._put_stat(job.id, 'missed', next_run_time=job.next_run_time)
            else:
                try:
                    # maybe add a timeout handle by join thread. 
                    # t = Thread(job.run); t.start(); t.join(timeout)
                    # refer: http://augustwu.iteye.com/blog/554827
                    self._put_stat(job.id, 'running', next_run_time=job.next_run_time)
                    result = job.run()
                    print 'job runned success'
                    cost = self.now() - now
                    self._put_stat(job.id, 'succed', cost=cost)

                except:
                    self.logger.exception('Job "%s" raised an exception', job)
                    cost = self.now() - now
                    self._put_stat(job.id, 'failed', cost=cost)

            if self.coalesce:
                break


    def _put_stat(self, job_id, status, next_run_time=None, cost=timedelta(seconds=0)):
        msg = {
            'time': pickle.dumps(self.now()),
            'job_id': job_id,
            'status': status,
            'next_run_time':pickle.dumps(next_run_time),
            'cost': cost.total_seconds() + cost.microseconds / 1000000
        }
        try:
            self._stats_queue.put(msg)
        except:
            logger.exception('failed to put stat item ' + json.dumps(msg))

    def _stat_runs(self):
        while not self._stopped:
            try: 
                msg = self._stats_queue.get(block=True, timeout=1)
            except:
                logger.exception('get stat item failed')
                msg = None

            if not msg:
                continue

            try:
                msg["time"] = pickle.loads(msg['time'])
                msg["next_run_time"] = pickle.loads(msg['next_run_time'])
                self._stat_store.report(**msg)
            except:
                traceback.print_exc()
                logger.exception('report job status failed ' + pickle.dumps(msg))

    def _sync_changes(self):
        count = 0
        max_items_once = int(self._config.pop('max_items_once', 0))
        while not self._stopped:
            try:
                msg = self._changes_queue.get(block=True, timeout=1)
            except:
                logger.exception('get sync item failed')
                msg = None

            if msg:
                opt_type = msg['opt_type']
                job_id   = msg['job_id']
                if job_id > 0 and isinstance(opt_type, basestring):
                    try:
                        self._apply_change(opt_type, job_id)
                    except:
                        pass
                    self.logger.info('apply change "%s" for job(%d)', opt_type, job_id)
                    count += 1

            if not msg or (max_items_once > 0 and count > max_items_once):
                if count > 0:
                    self.logger.info('wakeup main thread by sync thread with %d updates' % count)
                    self._wakeup.set()
                    count = 0


    def _apply_change(self, opt_type, job_id):
            if opt_type == 'add' or opt_type == 'update':
                try:
                    job = self._job_store.get_job(job_id)
                except Exception as e:
                    self.logger.exception(e)

                if job:
                    if opt_type == 'add':
                        if not self._jobs.has_key(job_id):
                            self._add_job(job)
                        else:
                            logger.exception("apply channge '%s job(id=%d, name=%s)' failed" % (opt_type, job.id, job.name))
                    else:
                        #!todo check if compute next_run_time again is necessary
                        now = self.now()
                        job.compute_next_run_time(now)
                        with self._jobs_locks[job_id]:
                            self._jobs[job_id] = job

            elif opt_type == 'delete' or opt_type == 'pause':
                self._remove_job(job_id)
            else:
                self.logger.exception('opt %s job(%d) to jobs pool is not supported' % (opt_type, job_id))
from apscheduler.scheduler import Scheduler
from apscheduler.jobstores.sqlalchemy_store import SQLAlchemyJobStore
import datetime
from time import sleep

JOBS_DATABASE = "postgresql://*****:*****@localhost/test_jobs"

# Start the scheduler
sched = Scheduler()
sched.add_jobstore(
    SQLAlchemyJobStore(url=JOBS_DATABASE, tablename='apscheduler_jobs'),
    'default')
sched.start()


def print_reservation_id(reservation_id):
    print "====> Reservation id is " + str(reservation_id)


if __name__ == '__main__':

    print "====> Printing jobs..."
    print sched.print_jobs()

    now = datetime.datetime.now()
    start_time = now + datetime.timedelta(seconds=3)
    later = now + datetime.timedelta(seconds=10)

    print "====> now is " + str(now)
    print "====> start_time is " + str(start_time)
    print "====> later is " + str(later)
Exemplo n.º 12
0
    def make_jobstore():
        if not SQLAlchemyJobStore:
            raise SkipTest

        return SQLAlchemyJobStore(url='sqlite:///example.sqlite')
Exemplo n.º 13
0
 def __init__(self):
     SQLAlchemyJobStore.__init__(self, engine=engine, metadata=metadata, tablename="Schedule")
Exemplo n.º 14
0
from hotqueue import HotQueue

from apscheduler.job import Job
from apscheduler.jobstores.sqlalchemy_store import SQLAlchemyJobStore
from dateutil.tz import gettz
from datetime import datetime, timedelta
from apscheduler.triggers import IntervalTrigger, DateTrigger
from apscheduler.scripts import HttpScript, CommandScript


if __name__ == '__main__':

    queue = HotQueue('job_changes')
    script = HttpScript(url='http://baidu.comm')
    store = SQLAlchemyJobStore(url='sqlite:////tmp/task.db', tablename='tasks')
    #script = CommandScript(command='ping -c 3 www.baidu.com')
    local_tz = gettz('Asia/Chongqing')
    defaults = {'timezone': local_tz}
    trigger = IntervalTrigger(defaults, seconds=60)
    #trigger = DateTrigger(defaults, run_date=datetime(2013,12,11, 8, 11))

    job = Job(name=u'BaiduCurlWithWrongUrl', script=script, trigger=trigger)

    #print job.run()
    now = datetime.now(local_tz)
    next_run_time = job.compute_next_run_time(now)
    print job.get_run_times(now+timedelta(seconds=60))
    
    if next_run_time:
        print "add job"