Exemplo n.º 1
0
def _get_connection(
    dburi=None,
    cl_properties=None,
    config_properties=None,
    submit_dir=None,
    db_type=None,
    pegasus_version=None,
    schema_check=True,
    create=False,
    force=False,
    print_version=True,
):
    """ Get connection to the database based on the parameters"""
    if dburi:
        return connection.connect(
            dburi,
            pegasus_version=pegasus_version,
            schema_check=schema_check,
            create=create,
            force=force,
            db_type=db_type,
            print_version=print_version,
        )
    elif submit_dir:
        return connection.connect_by_submitdir(
            submit_dir,
            db_type,
            config_properties,
            pegasus_version=pegasus_version,
            schema_check=schema_check,
            create=create,
            force=force,
            cl_properties=cl_properties,
            print_version=print_version,
        )

    elif config_properties or _has_connection_properties(cl_properties):
        return connection.connect_by_properties(
            config_properties,
            db_type,
            cl_properties=cl_properties,
            pegasus_version=pegasus_version,
            schema_check=schema_check,
            create=create,
            force=force,
            print_version=print_version,
        )

    if not db_type:
        dburi = connection._get_master_uri()
        return connection.connect(
            dburi,
            pegasus_version=pegasus_version,
            schema_check=schema_check,
            create=create,
            force=force,
            db_type=db_type,
            print_version=print_version,
        )
    return None
Exemplo n.º 2
0
 def loop_forever(self):
     while True:
         u = user.get_user_by_uid(os.getuid())
         session = connection.connect(
             u.get_master_db_url(), connect_args={"check_same_thread": False}
         )
         try:
             dao = Ensembles(session)
             self.loop_once(dao)
         finally:
             session.close()
         time.sleep(self.interval)
Exemplo n.º 3
0
    def delete(self):
        "Delete this submit dir and its entry in the master db"

        # Verify that we aren't trying to move a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException(
                "Subworkflows cannot be deleted independent of the root workflow"
            )

        # Confirm that they want to delete the workflow
        while True:
            try:
                input = raw_input
            except NameError:
                pass
            answer = (
                input(
                    "Are you sure you want to delete this workflow? This operation cannot be undone. [y/n]: "
                )
                .strip()
                .lower()
            )
            if answer == "y":
                break
            if answer == "n":
                return

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(
            self.submitdir, connection.DBType.MASTER
        )
        mdb = MasterDatabase(mdbsession)

        # Delete all of the records from the workflow db if they are not using
        # an sqlite db that is in the submit dir.
        db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
        if self.submitdir not in db_url:
            dbsession = connection.connect(db_url)
            db = WorkflowDatabase(dbsession)
            db.delete_workflow(self.wf_uuid)
            dbsession.commit()
            dbsession.close()

        # Delete the workflow
        mdb.delete_master_workflow(self.wf_uuid)

        # Remove all the files
        shutil.rmtree(self.submitdir)

        # Update master db
        mdbsession.commit()
        mdbsession.close()
Exemplo n.º 4
0
    def __init__(
        self,
        dburi,
        batch=True,
        props=None,
        db_type=None,
        backup=False,
        flush_every=1000,
    ):
        """Will be overridden by subclasses to take
        parameters specific to their function.
        """
        self.log = logging.getLogger("{}.{}".format(self.__module__,
                                                    self.__class__.__name__))
        self.dburi = dburi

        # PM-898 all props passed should have pegasus prefix stripped off
        # so they are more like connect_args to be used for database
        connect_args = {}
        for key in props.keyset():
            # we don't pass url in connect args
            if key != "url":
                connect_args[key] = props.property(key)

        # make sure timeout is an int
        if "timeout" in connect_args:
            connect_args["timeout"] = int(connect_args["timeout"])

        # self.session = connection.connect(dburi, create=True, props=props, db_type=db_type, backup=backup)
        self.session = connection.connect(
            dburi,
            create=True,
            connect_args=connect_args,
            db_type=db_type,
            backup=backup,
        )

        # flags and state for batching
        self._batch = batch
        self._flush_every = flush_every
        self._flush_count = 0
        self._last_flush = time.time()
Exemplo n.º 5
0
    def __init__(self, connString=None, expand_workflow=True):
        self.log = logging.getLogger(
            "{}.{}".format(self.__module__, self.__class__.__name__)
        )
        try:
            self.session = connection.connect(connString)
        except (connection.ConnectionError, DBAdminError) as e:
            self.log.exception(e)
            raise StampedeDBNotFoundError

        self._expand = expand_workflow

        self._root_wf_id = []
        self._root_wf_uuid = []
        self.all_workflows = None
        self._job_filter_mode = None
        self._time_filter_mode = None
        self._host_filter = None
        self._xform_filter = {"include": None, "exclude": None}

        self._wfs = []
Exemplo n.º 6
0
    def run(self):
        """Trigger manager main loop."""

        self.log.info("trigger manager starting")

        while True:
            # TODO: user will always be the same.., keep this in the loop or move it out
            u = user.get_user_by_uid(os.getuid())
            session = connection.connect(
                u.get_master_db_url(),
                connect_args={"check_same_thread": False})

            try:
                self.trigger_dao = Triggers(session)
                self.ensemble_dao = Ensembles(session)
                triggers = self.trigger_dao.list_triggers()
                self.log.info("processing {} triggers".format(len(triggers)))
                for t in triggers:
                    t_name = TriggerManager.get_tname(t)
                    if t.state == "READY":
                        self.start_trigger(t)
                    elif t.state == "RUNNING" and t_name not in self.running:
                        # restart
                        self.log.debug(
                            "{} not in memory, restarting it".format(t_name))
                        self.start_trigger(t)
                    elif t.state == "RUNNING" and not self.running[
                            t_name].is_alive():
                        # exited
                        self.log.debug(
                            "{} exited, removing references to it".format(
                                t_name))
                        self.stop_trigger(t)
                    elif t.state == "STOPPED":
                        self.stop_trigger(t)
            finally:
                session.close()

            time.sleep(self.polling_rate)
Exemplo n.º 7
0
def delete_workflow(dburi, wf_uuid):
    "Expunge workflow from workflow database"

    log.info("Expunging %s from workflow database", wf_uuid)

    session = connection.connect(dburi, create=True)
    try:
        query = session.query(Workflow).filter(Workflow.wf_uuid == wf_uuid)
        try:
            wf = query.one()
        except orm.exc.NoResultFound as e:
            log.warn("No workflow found with wf_uuid %s - aborting expunge",
                     wf_uuid)
            return

        # PM-1218 gather list of descendant workflows with wf_uuid
        query = session.query(Workflow).filter(Workflow.root_wf_id == wf.wf_id)
        try:
            desc_wfs = query.all()
            for desc_wf in desc_wfs:
                # delete the files from the rc_lfn explicitly as they are
                # not associated with workflow table
                __delete_workflow_files__(session, desc_wf.wf_uuid,
                                          desc_wf.wf_id)
        except orm.exc.NoResultFound as e:
            log.warn("No workflow found with root wf_id %s - aborting expunge",
                     wf.wf_id)
            return

        session.delete(wf)

        log.info("Flushing top-level workflow: %s", wf.wf_uuid)
        i = time.time()
        session.flush()
        session.commit()
        log.info("Flush took: %f seconds", time.time() - i)
    finally:
        session.close()
Exemplo n.º 8
0
def delete_dashboard_workflow(dburi, wf_uuid):
    "Expunge workflow from dashboard database"

    log.info("Expunging %s from dashboard database", wf_uuid)

    session = connection.connect(dburi, create=True)
    try:
        query = session.query(MasterWorkflow).filter(
            MasterWorkflow.wf_uuid == wf_uuid)
        try:
            wf = query.one()
        except orm.exc.NoResultFound as e:
            log.warn("No workflow found with wf_uuid %s - aborting expunge",
                     wf_uuid)
            return

        session.delete(wf)

        i = time.time()
        session.flush()
        session.commit()
        log.info("Flush took: %f seconds", time.time() - i)
    finally:
        session.close()
Exemplo n.º 9
0
def connect():
    log.debug("Connecting to database")
    g.master_db_url = g.user.get_master_db_url()
    g.session = connection.connect(g.master_db_url,
                                   connect_args={"check_same_thread": False})
Exemplo n.º 10
0
    def detach(self, wf_uuid=None):
        "Remove any master db entries for the given root workflow"
        if self.submitdir_exists:
            # Verify that we aren't trying to detach a subworkflow
            if self.is_subworkflow():
                raise SubmitDirException(
                    "Subworkflows cannot be detached independent of the root workflow"
                )

            # Connect to master database
            mdbsession = connection.connect_by_submitdir(
                self.submitdir, connection.DBType.MASTER
            )
            mdb = MasterDatabase(mdbsession)

            # Check to see if it even exists
            wf = mdb.get_master_workflow(self.wf_uuid)
            if wf is None:
                print("Workflow is not in master DB")
            else:
                # Delete the workflow (this will delete the master_workflowstate entries as well)
                mdb.delete_master_workflow(self.wf_uuid)

            # Update the master db
            mdbsession.commit()
            mdbsession.close()

        else:
            # Connect to master database
            home = expanduser("~")
            mdbsession = connection.connect(
                "sqlite:///%s/.pegasus/workflow.db" % home,
                db_type=connection.DBType.MASTER,
            )
            mdb = MasterDatabase(mdbsession)

            try:
                if wf_uuid is None:
                    wfs = mdb.get_master_workflow_for_submitdir(self.submitdir)
                    if wfs:
                        msg = (
                            "Invalid submit dir: %s, Specify --wf-uuid <WF_UUID> to detach\n"
                            % self.submitdir
                        )
                        msg += (
                            "\tWorkflow UUID, DAX Label, Submit Hostname, Submit Dir.\n"
                        )
                        for wf in wfs:
                            msg += "\t{}, {}, {}, {}\n".format(
                                wf.wf_uuid,
                                wf.dax_label,
                                wf.submit_hostname,
                                wf.submit_dir,
                            )
                        raise SubmitDirException(msg)

                    else:
                        raise SubmitDirException(
                            "Invalid submit dir: %s" % self.submitdir
                        )

                else:
                    # Delete
                    mdb.delete_master_workflow(wf_uuid, submit_dir=self.submitdir)

                    # Update the master db
                    mdbsession.commit()

            finally:
                mdbsession.close()
Exemplo n.º 11
0
    def attach(self):
        "Add a workflow to the master db"

        # Verify that we aren't trying to attach a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException(
                "Subworkflows cannot be attached independent of the root workflow"
            )

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(
            self.submitdir, connection.DBType.MASTER
        )
        mdb = MasterDatabase(mdbsession)

        # Check to see if it already exists and just update it
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is not None:
            print("Workflow is already in master db")
            old_submit_dir = wf.submit_dir
            if old_submit_dir != self.submitdir:
                print("Updating path...")
                wf.submit_dir = self.submitdir
                wf.db_url = connection.url_by_submitdir(
                    self.submitdir, connection.DBType.WORKFLOW
                )
                mdbsession.commit()
            mdbsession.close()
            return

        # Connect to workflow db
        db_url = connection.url_by_submitdir(self.submitdir, connection.DBType.WORKFLOW)
        dbsession = connection.connect(db_url)
        db = WorkflowDatabase(dbsession)

        # Get workflow record
        wf = db.get_workflow(self.wf_uuid)
        if wf is None:
            print("No database record for that workflow exists")
            return

        # Update the workflow record
        wf.submit_dir = self.submitdir
        wf.db_url = db_url

        # Insert workflow record into master db
        mwf = MasterWorkflow()
        mwf.wf_uuid = wf.wf_uuid
        mwf.dax_label = wf.dax_label
        mwf.dax_version = wf.dax_version
        mwf.dax_file = wf.dax_file
        mwf.dag_file_name = wf.dag_file_name
        mwf.timestamp = wf.timestamp
        mwf.submit_hostname = wf.submit_hostname
        mwf.submit_dir = self.submitdir
        mwf.planner_arguments = wf.planner_arguments
        mwf.user = wf.user
        mwf.grid_dn = wf.grid_dn
        mwf.planner_version = wf.planner_version
        mwf.db_url = wf.db_url
        mwf.archived = self.is_archived()
        mdbsession.add(mwf)
        mdbsession.flush()  # We should have the new wf_id after this

        # Query states from workflow database
        states = db.get_workflow_states(wf.wf_id)

        # Insert states into master db
        for s in states:
            ms = MasterWorkflowstate()
            ms.wf_id = mwf.wf_id
            ms.state = s.state
            ms.timestamp = s.timestamp
            ms.restart_count = s.restart_count
            ms.status = s.status
            mdbsession.add(ms)
        mdbsession.flush()

        dbsession.commit()
        dbsession.close()

        mdbsession.commit()
        mdbsession.close()
Exemplo n.º 12
0
    def move(self, dest):
        "Move this submit directory to dest"

        dest = os.path.abspath(dest)

        if os.path.isfile(dest):
            raise SubmitDirException("Destination is a file: %s" % dest)

        if os.path.isdir(dest):
            if os.path.exists(os.path.join(dest, "braindump.txt")):
                raise SubmitDirException("Destination is a submit dir: %s" % dest)
            dest = os.path.join(dest, os.path.basename(self.submitdir))

        # Verify that we aren't trying to move a subworkflow
        if self.is_subworkflow():
            raise SubmitDirException(
                "Subworkflows cannot be moved independent of the root workflow"
            )

        # Connect to master database
        mdbsession = connection.connect_by_submitdir(
            self.submitdir, connection.DBType.MASTER
        )
        mdb = MasterDatabase(mdbsession)

        # Get the workflow record from the master db
        db_url = None
        wf = mdb.get_master_workflow(self.wf_uuid)
        if wf is None:
            db_url = connection.url_by_submitdir(
                self.submitdir, connection.DBType.WORKFLOW
            )
        else:
            # We found an mdb record, so we need to update it

            # Save the master db's pointer
            db_url = wf.db_url

            # Update the master db's db_url
            # Note that this will only update the URL if it is an sqlite file
            # located in the submitdir
            log.info("Old master db_url: %s" % wf.db_url)
            wf.db_url = db_url.replace(self.submitdir, dest)
            log.info("New master db_url: %s" % wf.db_url)

            # Change the master db's submit_dir
            log.info("Old master submit_dir: %s" % wf.submit_dir)
            wf.submit_dir = dest
            log.info("New master submit_dir: %s" % wf.submit_dir)

        # Update the ensemble record if one exists
        ew = mdb.get_ensemble_workflow(self.wf_uuid)
        if ew is not None:
            log.info("Old ensemble submit dir: %s", ew.submitdir)
            ew.submitdir = dest
            log.info("New ensemble submit dir: %s", ew.submitdir)

        # Update the workflow database if we found one
        if db_url is not None:
            dbsession = connection.connect(db_url)
            db = WorkflowDatabase(dbsession)
            root_wf = db.get_workflow(self.wf_uuid)
            db.update_submit_dirs(root_wf.wf_id, self.submitdir, dest)
            dbsession.commit()
            dbsession.close()

        # Move all the files
        shutil.move(self.submitdir, dest)

        # Set new paths in the braindump file
        self.braindump["submit_dir"] = dest
        self.braindump["basedir"] = os.path.dirname(dest)
        utils.write_braindump(os.path.join(dest, "braindump.txt"), self.braindump)

        # Note that we do not need to update the properties file even though it
        # might contain DB URLs because it cannot contain a DB URL with the submit
        # dir in it.

        # TODO We might want to update all of the absolute paths in the condor submit files
        # if we plan on moving workflows that could be resubmitted in the future

        # TODO We might want to update the braindump files for subworkflows

        # Update master database
        mdbsession.commit()
        mdbsession.close()

        # Finally, update object
        self.submitdir = dest
Exemplo n.º 13
0
def all_workflows_db(db,
                     update=True,
                     pegasus_version=None,
                     schema_check=True,
                     force=False):
    """
    Update/Downgrade all completed workflow databases listed in master_workflow table.
    :param db: DB session object
    :param pegasus_version: version of the Pegasus software (e.g., 4.6.0)
    :param schema_check: whether a sanity check of the schema should be performed
    :param force: whether operations should be performed despite conflicts
    """
    # log files
    file_prefix = "%s-dbadmin" % time.strftime("%Y%m%dT%H%M%S")
    f_out = open("%s.out" % file_prefix, "w")
    f_err = open("%s.err" % file_prefix, "w")

    data = (db.query(
        MasterWorkflow.db_url,
        MasterWorkflowstate.state,
        func.max(MasterWorkflowstate.timestamp),
    ).join(MasterWorkflowstate).group_by(MasterWorkflow.wf_id).all())

    db_urls = []
    for d in data:
        if d[1] == "WORKFLOW_TERMINATED":
            db_urls.append(d[0])
            f_err.write("[ACTIVE] %s\n" % d[0])

    counts = {
        "total": len(data),
        "running": len(data) - len(db_urls),
        "success": 0,
        "failed": 0,
        "unable_to_connect": 0,
    }
    if update:
        msg = ["updating", "Updated"]
    else:
        msg = ["downgrading", "Downgraded"]

    print("")
    print("Verifying and %s workflow databases:" % msg[0])
    i = counts["running"]
    for dburi in db_urls:
        log.debug("{} '{}'...".format(msg[0], dburi))
        i += 1
        sys.stdout.write("\r%d/%d" % (i, counts["total"]))
        sys.stdout.flush()
        try:
            if update:
                con = connection.connect(
                    dburi,
                    pegasus_version=pegasus_version,
                    schema_check=schema_check,
                    create=True,
                    force=force,
                    verbose=False,
                )
            else:
                con = connection.connect(dburi,
                                         schema_check=schema_check,
                                         create=False,
                                         verbose=False)
                metadata.clear()
                warnings.simplefilter("ignore")
                metadata.reflect(bind=con.get_bind())
                db_downgrade(con,
                             pegasus_version=pegasus_version,
                             force=force,
                             verbose=False)
            con.close()
            f_out.write("[SUCCESS] %s\n" % dburi)
            counts["success"] += 1
        except connection.ConnectionError as e:
            if "unable to open database file" in str(e):
                f_err.write("[UNABLE TO CONNECT] %s\n" % dburi)
                counts["unable_to_connect"] += 1
                log.debug(e)
            else:
                f_err.write("[ERROR] %s\n" % dburi)
                counts["failed"] += 1
                log.debug(e)
        except Exception as e:
            f_err.write("[ERROR] %s\n" % dburi)
            counts["failed"] += 1
            log.debug(e)

    f_out.close()
    f_err.close()

    print("\n\nSummary:")
    print("  Verified/{}: {}/{}".format(msg[1], counts["success"],
                                        counts["total"]))
    print("  Failed: {}/{}".format(counts["failed"], counts["total"]))
    print("  Unable to connect: {}/{}".format(counts["unable_to_connect"],
                                              counts["total"]))
    print("  Unable to update (active workflows): %s/%s" %
          (counts["running"], counts["total"]))
    print("\nLog files:")
    print("  %s.out (Succeeded operations)" % file_prefix)
    print("  %s.err (Failed operations)" % file_prefix)