def run(args, dag=None): db_utils.pessimistic_connection_handling() if dag: args.dag_id = dag.dag_id # Setting up logging log_base = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log_base + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) logging.root.handlers = [] logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle and not dag: dag = get_dag(args) elif not dag: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, pool=args.pool) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, job_id=args.job_id, pool=args.pool, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, force=args.force, pool=args.pool) executor.heartbeat() executor.end() # Force the log to flush, and set the handler to go back to normal so we # don't continue logging to the task's log file. The flush is important # because we subsequently read from the log to insert into S3 or Google # cloud storage. logging.root.handlers[0].flush() logging.root.handlers = [] # store logs remotely remote_base = conf.get('core', 'REMOTE_BASE_LOG_FOLDER') # deprecated as of March 2016 if not remote_base and conf.get('core', 'S3_LOG_FOLDER'): warnings.warn( 'The S3_LOG_FOLDER conf key has been replaced by ' 'REMOTE_BASE_LOG_FOLDER. Your conf still works but please ' 'update airflow.cfg to ensure future compatibility.', DeprecationWarning) remote_base = conf.get('core', 'S3_LOG_FOLDER') if os.path.exists(filename): # read log and remove old logs to get just the latest additions with open(filename, 'r') as logfile: log = logfile.read() remote_log_location = filename.replace(log_base, remote_base) # S3 if remote_base.startswith('s3:/'): logging_utils.S3Log().write(log, remote_log_location) # GCS elif remote_base.startswith('gs:/'): logging_utils.GCSLog().write( log, remote_log_location, append=True) # Other elif remote_base and remote_base != 'None': logging.error( 'Unsupported remote log location: {}'.format(remote_base))
def _execute(self): dag_id = self.dag_id pessimistic_connection_handling() logging.basicConfig(level=logging.DEBUG) self.logger.info("Starting the scheduler") dagbag = models.DagBag(self.subdir, sync_to_db=True) executor = self.executor = dagbag.executor executor.start() i = 0 while not self.num_runs or self.num_runs > i: try: loop_start_dttm = datetime.now() try: self.process_events(executor=executor, dagbag=dagbag) self.prioritize_queued(executor=executor, dagbag=dagbag) except Exception as e: self.logger.exception(e) i += 1 try: if i % self.refresh_dags_every == 0: dagbag = models.DagBag(self.subdir, sync_to_db=True) else: dagbag.collect_dags(only_if_updated=True) except: self.logger.error("Failed at reloading the dagbag") Stats.incr('dag_refresh_error', 1, 1) sleep(5) if dag_id: dags = [dagbag.dags[dag_id]] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag ] paused_dag_ids = dagbag.paused_dags() for dag in dags: self.logger.debug("Scheduling {}".format(dag.dag_id)) dag = dagbag.get_dag(dag.dag_id) if not dag or (dag.dag_id in paused_dag_ids): continue try: self.schedule_dag(dag) self.process_dag(dag, executor) self.manage_slas(dag) except Exception as e: self.logger.exception(e) self.logger.info("Done queuing tasks, calling the executor's " "heartbeat") duration_sec = (datetime.now() - loop_start_dttm).total_seconds() self.logger.info("Loop took: {} seconds".format(duration_sec)) try: self.import_errors(dagbag) except Exception as e: self.logger.exception(e) try: dagbag.kill_zombies() except Exception as e: self.logger.exception(e) try: # We really just want the scheduler to never ever stop. executor.heartbeat() self.heartbeat() except Exception as e: self.logger.exception(e) self.logger.error("Tachycardia!") except Exception as deep_e: self.logger.exception(deep_e) raise finally: settings.Session.remove() executor.end()
def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): self.logger.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) pessimistic_connection_handling() logging.basicConfig(level=logging.DEBUG) self.logger.info("Starting the scheduler") dagbag = models.DagBag(self.subdir, sync_to_db=True) executor = dagbag.executor executor.start() i = 0 while not self.num_runs or self.num_runs > i: try: loop_start_dttm = datetime.now() try: self.prioritize_queued(executor=executor, dagbag=dagbag) except Exception as e: self.logger.exception(e) i += 1 try: if i % self.refresh_dags_every == 0: dagbag = models.DagBag(self.subdir, sync_to_db=True) else: dagbag.collect_dags(only_if_updated=True) except: self.logger.error("Failed at reloading the dagbag") Stats.incr('dag_refresh_error', 1, 1) sleep(5) if dag_id: dags = [dagbag.dags[dag_id]] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag] paused_dag_ids = dagbag.paused_dags() for dag in dags: self.logger.debug("Scheduling {}".format(dag.dag_id)) dag = dagbag.get_dag(dag.dag_id) if not dag or (dag.dag_id in paused_dag_ids): continue try: self.schedule_dag(dag) self.process_dag(dag, executor) self.manage_slas(dag) except Exception as e: self.logger.exception(e) self.logger.info("Done queuing tasks, calling the executor's " "heartbeat") duration_sec = (datetime.now() - loop_start_dttm).total_seconds() self.logger.info("Loop took: {} seconds".format(duration_sec)) try: self.import_errors(dagbag) except Exception as e: self.logger.exception(e) try: dagbag.kill_zombies() except Exception as e: self.logger.exception(e) try: # We really just want the scheduler to never ever stop. executor.heartbeat() self.heartbeat() except Exception as e: self.logger.exception(e) self.logger.error("Tachycardia!") except Exception as deep_e: self.logger.exception(deep_e) finally: settings.Session.remove() executor.end()
def _execute(self): TI = models.TaskInstance pessimistic_connection_handling() logging.basicConfig(level=logging.DEBUG) self.logger.info("Starting the scheduler") dagbag = models.DagBag(self.subdir, sync_to_db=True) executor = self.executor = dagbag.executor executor.start() self.runs = 0 while not self.num_runs or self.num_runs > self.runs: try: loop_start_dttm = datetime.now() try: self.prioritize_queued(executor=executor, dagbag=dagbag) except Exception as e: self.logger.exception(e) self.runs += 1 try: if self.runs % self.refresh_dags_every == 0: dagbag = models.DagBag(self.subdir, sync_to_db=True) else: dagbag.collect_dags(only_if_updated=True) except Exception as e: self.logger.error("Failed at reloading the dagbag. {}".format(e)) Stats.incr('dag_refresh_error', 1, 1) sleep(5) if len(self.dag_ids) > 0: dags = [dag for dag in dagbag.dags.values() if dag.dag_id in self.dag_ids] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag] paused_dag_ids = dagbag.paused_dags() dags = [x for x in dags if x.dag_id not in paused_dag_ids] # dags = filter(lambda x: x.dag_id not in paused_dag_ids, dags) self.logger.debug("Total Cores: {} Max Threads: {} DAGs:{}". format(multiprocessing.cpu_count(), self.max_threads, len(dags))) dags = self._split(dags, math.ceil(len(dags) / self.max_threads)) tis_q = multiprocessing.Queue() jobs = [multiprocessing.Process(target=self._do_dags, args=(dagbag, dags[i], tis_q)) for i in range(len(dags))] self.logger.info("Starting {} scheduler jobs".format(len(jobs))) for j in jobs: j.start() while any(j.is_alive() for j in jobs): while not tis_q.empty(): ti_key, pickle_id = tis_q.get() dag = dagbag.dags[ti_key[0]] task = dag.get_task(ti_key[1]) ti = TI(task, ti_key[2]) self.executor.queue_task_instance(ti, pickle_id=pickle_id) for j in jobs: j.join() self.logger.info("Done queuing tasks, calling the executor's " "heartbeat") duration_sec = (datetime.now() - loop_start_dttm).total_seconds() self.logger.info("Loop took: {} seconds".format(duration_sec)) Stats.timing("scheduler_loop", duration_sec * 1000) try: self.import_errors(dagbag) except Exception as e: self.logger.exception(e) try: dagbag.kill_zombies() except Exception as e: self.logger.exception(e) try: # We really just want the scheduler to never ever stop. executor.heartbeat() self.heartbeat() except Exception as e: self.logger.exception(e) self.logger.error("Tachycardia!") except Exception as deep_e: self.logger.exception(deep_e) raise finally: settings.Session.remove() executor.end()
def _execute(self): TI = models.TaskInstance pessimistic_connection_handling() logging.basicConfig(level=logging.DEBUG) self.logger.info("Starting the scheduler") dagbag = models.DagBag(self.subdir, sync_to_db=True) executor = self.executor = dagbag.executor executor.start() self.runs = 0 while not self.num_runs or self.num_runs > self.runs: try: loop_start_dttm = datetime.now() try: self.prioritize_queued(executor=executor, dagbag=dagbag) except Exception as e: self.logger.exception(e) self.runs += 1 try: if self.runs % self.refresh_dags_every == 0: dagbag = models.DagBag(self.subdir, sync_to_db=True) else: dagbag.collect_dags(only_if_updated=True) except Exception as e: self.logger.error("Failed at reloading the dagbag. {}".format(e)) Stats.incr('dag_refresh_error', 1, 1) sleep(5) if len(self.dag_ids) > 0: dags = [dag for dag in dagbag.dags.values() if dag.dag_id in self.dag_ids] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag] paused_dag_ids = dagbag.paused_dags() dags = [x for x in dags if x.dag_id not in paused_dag_ids] # dags = filter(lambda x: x.dag_id not in paused_dag_ids, dags) self.logger.debug("Total Cores: {} Max Threads: {} DAGs:{}". format(multiprocessing.cpu_count(), self.max_threads, len(dags))) dags = self._split_dags(dags, math.ceil(len(dags) / self.max_threads)) tis_q = multiprocessing.Queue() jobs = [multiprocessing.Process(target=self._do_dags, args=(dagbag, dags[i], tis_q)) for i in range(len(dags))] self.logger.info("Starting {} scheduler jobs".format(len(jobs))) for j in jobs: j.start() while any(j.is_alive() for j in jobs): while not tis_q.empty(): ti_key, pickle_id = tis_q.get() dag = dagbag.dags[ti_key[0]] task = dag.get_task(ti_key[1]) ti = TI(task, ti_key[2]) self.executor.queue_task_instance(ti, pickle_id=pickle_id) for j in jobs: j.join() self.logger.info("Done queuing tasks, calling the executor's " "heartbeat") duration_sec = (datetime.now() - loop_start_dttm).total_seconds() self.logger.info("Loop took: {} seconds".format(duration_sec)) try: self.import_errors(dagbag) except Exception as e: self.logger.exception(e) try: dagbag.kill_zombies() except Exception as e: self.logger.exception(e) try: # We really just want the scheduler to never ever stop. executor.heartbeat() self.heartbeat() except Exception as e: self.logger.exception(e) self.logger.error("Tachycardia!") except Exception as deep_e: self.logger.exception(deep_e) raise finally: settings.Session.remove() executor.end()