Exemplo n.º 1
0
 def __init__(self, dag: DAG):
     self.dag_id = dag.dag_id
     self.fileloc = dag.full_filepath
     self.fileloc_hash = DagCode.dag_fileloc_hash(self.fileloc)
     self.data = SerializedDAG.to_dict(dag)
     self.last_updated = timezone.utcnow()
     self.dag_hash = hashlib.md5(json.dumps(self.data, sort_keys=True).encode("utf-8")).hexdigest()
Exemplo n.º 2
0
def upgrade():
    """Apply add source code table"""
    op.create_table(
        'dag_code',  # pylint: disable=no-member
        sa.Column('fileloc_hash',
                  sa.BigInteger(),
                  nullable=False,
                  primary_key=True,
                  autoincrement=False),
        sa.Column('fileloc', sa.String(length=2000), nullable=False),
        sa.Column('source_code', sa.UnicodeText(), nullable=False),
        sa.Column('last_updated', sa.TIMESTAMP(timezone=True), nullable=False))

    conn = op.get_bind()
    if conn.dialect.name not in ('sqlite'):
        op.drop_index('idx_fileloc_hash', 'serialized_dag')
        op.alter_column(table_name='serialized_dag',
                        column_name='fileloc_hash',
                        type_=sa.BigInteger(),
                        nullable=False)
        op.create_index(  # pylint: disable=no-member
            'idx_fileloc_hash', 'serialized_dag', ['fileloc_hash'])

    sessionmaker = sa.orm.sessionmaker()
    session = sessionmaker(bind=conn)
    serialized_dags = session.query(SerializedDagModel).all()
    for dag in serialized_dags:
        dag.fileloc_hash = DagCode.dag_fileloc_hash(dag.fileloc)
        session.merge(dag)
    session.commit()
    def delete_workflow(self, project_name: Text,
                        workflow_name: Text) -> Optional[WorkflowInfo]:
        dag_id = self.airflow_dag_id(project_name, workflow_name)
        if not self.dag_exist(dag_id):
            return None
        deploy_path = self.config.properties().get('airflow_deploy_path')
        if deploy_path is None:
            raise Exception("airflow_deploy_path config not set!")
        airflow_file_path = os.path.join(deploy_path, dag_id + '.py')
        if os.path.exists(airflow_file_path):
            os.remove(airflow_file_path)

        # stop all workflow executions
        self.kill_all_workflow_execution(project_name, workflow_name)

        # clean db meta
        with create_session() as session:
            dag = session.query(DagModel).filter(
                DagModel.dag_id == dag_id).first()
            session.query(DagTag).filter(DagTag.dag_id == dag_id).delete()
            session.query(DagModel).filter(DagModel.dag_id == dag_id).delete()
            session.query(DagCode).filter(
                DagCode.fileloc_hash == DagCode.dag_fileloc_hash(
                    dag.fileloc)).delete()
            session.query(SerializedDagModel).filter(
                SerializedDagModel.dag_id == dag_id).delete()
            session.query(DagRun).filter(DagRun.dag_id == dag_id).delete()
            session.query(TaskState).filter(
                TaskState.dag_id == dag_id).delete()
            session.query(TaskInstance).filter(
                TaskInstance.dag_id == dag_id).delete()
            session.query(TaskExecution).filter(
                TaskExecution.dag_id == dag_id).delete()
        return WorkflowInfo(namespace=project_name,
                            workflow_name=workflow_name)
 def __init__(self, dag: DAG):
     self.dag_id = dag.dag_id
     self.fileloc = dag.full_filepath
     self.fileloc_hash = DagCode.dag_fileloc_hash(self.fileloc)
     self.data = SerializedDAG.to_dict(dag)
     self.last_updated = timezone.utcnow()
     dag_event_deps = DagEventDependencies(dag)
     self.event_relationships = DagEventDependencies.to_json(dag_event_deps)
     self.dag_hash = hashlib.md5(
         json.dumps(self.data, sort_keys=True).encode("utf-8")).hexdigest()
Exemplo n.º 5
0
    def _compare_example_dags(self, example_dags):
        with create_session() as session:
            for dag in example_dags.values():
                self.assertTrue(DagCode.has_dag(dag.fileloc))
                dag_fileloc_hash = DagCode.dag_fileloc_hash(dag.fileloc)
                result = session.query(
                    DagCode.fileloc, DagCode.fileloc_hash, DagCode.source_code) \
                    .filter(DagCode.fileloc == dag.fileloc) \
                    .filter(DagCode.fileloc_hash == dag_fileloc_hash) \
                    .one()

                self.assertEqual(result.fileloc, dag.fileloc)
                with open_maybe_zipped(dag.fileloc, 'r') as source:
                    source_code = source.read()
                self.assertEqual(result.source_code, source_code)
Exemplo n.º 6
0
def upgrade():
    """Create DagCode Table."""
    from sqlalchemy.ext.declarative import declarative_base

    Base = declarative_base()

    class SerializedDagModel(Base):
        __tablename__ = 'serialized_dag'

        # There are other columns here, but these are the only ones we need for the SELECT/UPDATE we are doing
        dag_id = sa.Column(sa.String(250), primary_key=True)
        fileloc = sa.Column(sa.String(2000), nullable=False)
        fileloc_hash = sa.Column(sa.BigInteger, nullable=False)

    """Apply add source code table"""
    op.create_table(
        'dag_code',  # pylint: disable=no-member
        sa.Column('fileloc_hash',
                  sa.BigInteger(),
                  nullable=False,
                  primary_key=True,
                  autoincrement=False),
        sa.Column('fileloc', sa.String(length=2000), nullable=False),
        sa.Column('source_code', sa.UnicodeText(), nullable=False),
        sa.Column('last_updated', sa.TIMESTAMP(timezone=True), nullable=False),
    )

    conn = op.get_bind()
    if conn.dialect.name != 'sqlite':
        if conn.dialect.name == "mssql":
            op.drop_index('idx_fileloc_hash', 'serialized_dag')

        op.alter_column(table_name='serialized_dag',
                        column_name='fileloc_hash',
                        type_=sa.BigInteger(),
                        nullable=False)
        if conn.dialect.name == "mssql":
            op.create_index('idx_fileloc_hash', 'serialized_dag',
                            ['fileloc_hash'])

    sessionmaker = sa.orm.sessionmaker()
    session = sessionmaker(bind=conn)
    serialized_dags = session.query(SerializedDagModel).all()
    for dag in serialized_dags:
        dag.fileloc_hash = DagCode.dag_fileloc_hash(dag.fileloc)
        session.merge(dag)
    session.commit()
Exemplo n.º 7
0
    def remove_deleted_dags(cls, alive_dag_filelocs: List[str], session=None):
        """Deletes DAGs not included in alive_dag_filelocs.

        :param alive_dag_filelocs: file paths of alive DAGs
        :param session: ORM Session
        """
        alive_fileloc_hashes = [
            DagCode.dag_fileloc_hash(fileloc) for fileloc in alive_dag_filelocs
        ]

        log.debug(
            "Deleting Serialized DAGs (for which DAG files are deleted) from %s table ",
            cls.__tablename__)

        session.execute(cls.__table__.delete().where(
            and_(cls.fileloc_hash.notin_(alive_fileloc_hashes),
                 cls.fileloc.notin_(alive_dag_filelocs))))
Exemplo n.º 8
0
    def _compare_example_dags(self, example_dags):
        with create_session() as session:
            for dag in example_dags.values():
                if dag.is_subdag:
                    dag.fileloc = dag.parent_dag.fileloc
                assert DagCode.has_dag(dag.fileloc)
                dag_fileloc_hash = DagCode.dag_fileloc_hash(dag.fileloc)
                result = (
                    session.query(DagCode.fileloc, DagCode.fileloc_hash, DagCode.source_code)
                    .filter(DagCode.fileloc == dag.fileloc)
                    .filter(DagCode.fileloc_hash == dag_fileloc_hash)
                    .one()
                )

                assert result.fileloc == dag.fileloc
                with open_maybe_zipped(dag.fileloc, 'r') as source:
                    source_code = source.read()
                assert result.source_code == source_code
Exemplo n.º 9
0
    def __init__(self, dag: DAG):
        self.dag_id = dag.dag_id
        self.fileloc = dag.fileloc
        self.fileloc_hash = DagCode.dag_fileloc_hash(self.fileloc)
        self.last_updated = timezone.utcnow()

        dag_data = SerializedDAG.to_dict(dag)
        dag_data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8")

        self.dag_hash = hashlib.md5(dag_data_json).hexdigest()

        if COMPRESS_SERIALIZED_DAGS:
            self._data = None
            self._data_compressed = zlib.compress(dag_data_json)
        else:
            self._data = dag_data
            self._data_compressed = None

        # serve as cache so no need to decompress and load, when accessing data field
        # when COMPRESS_SERIALIZED_DAGS is True
        self.__data_cache = dag_data
Exemplo n.º 10
0
 def test_dag_fileloc_hash(self):
     """Verifies the correctness of hashing file path."""
     self.assertEqual(DagCode.dag_fileloc_hash('/airflow/dags/test_dag.py'),
                      33826252060516589)
Exemplo n.º 11
0
 def __init__(self, dag):
     self.dag_id = dag.dag_id
     self.fileloc = dag.full_filepath
     self.fileloc_hash = DagCode.dag_fileloc_hash(self.fileloc)
     self.data = SerializedDAG.to_dict(dag)
     self.last_updated = timezone.utcnow()