def _cleanup_stale_dags(self): """ Clean up any DAGs that we have not loaded recently. There are two parts to the cleanup: 1. Mark DAGs that haven't been seen as inactive 2. Delete any DAG serializations for DAGs that haven't been seen """ if 0 < self._dag_cleanup_interval < ( timezone.utcnow() - self.last_dag_cleanup_time).total_seconds(): # In the worst case Every DAG should have been processed within # file_process_interval + processor_timeout + min_serialized_dag_update_interval max_processing_time = self._processor_timeout + \ timedelta(seconds=self._file_process_interval) + \ timedelta(seconds=self._min_serialized_dag_update_interval) min_last_seen_date = timezone.utcnow() - max_processing_time self.log.info( "Deactivating DAGs that haven't been touched since %s", min_last_seen_date.isoformat()) airflow.models.DAG.deactivate_stale_dags(min_last_seen_date) if STORE_SERIALIZED_DAGS: from airflow.models.serialized_dag import SerializedDagModel SerializedDagModel.remove_stale_dags(min_last_seen_date) if self.store_dag_code: from airflow.models.dagcode import DagCode DagCode.remove_unused_code() self.last_dag_cleanup_time = timezone.utcnow()
def test_remove_unused_code(self): example_dags = make_example_dags(example_dags_module) self._write_example_dags() bash_dag = example_dags['example_bash_operator'] with create_session() as session: for model in models.base.Base._decl_class_registry.values(): # pylint: disable=protected-access if hasattr(model, "dag_id"): session.query(model) \ .filter(model.dag_id == bash_dag.dag_id) \ .delete(synchronize_session='fetch') self.assertEqual( session.query(DagCode).filter( DagCode.fileloc == bash_dag.fileloc).count(), 1) DagCode.remove_unused_code() self.assertEqual( session.query(DagCode).filter( DagCode.fileloc == bash_dag.fileloc).count(), 0)