Exemplo n.º 1
0
def db_check(mod_akrr=True, mod_appkernel=True, modw=True):
    overall_success = True

    if mod_akrr:
        akrr_ok = check_rw_db(
            akrr.db.get_akrr_db,
            "Checking 'mod_akrr' Database / User privileges...",
            "'mod_akrr' Database check complete - Status: %s")
        overall_success = overall_success and akrr_ok

    if mod_appkernel:
        app_kernel_ok = check_rw_db(
            akrr.db.get_ak_db,
            "Checking 'mod_appkernel' Database / User privileges...",
            "'mod_appkernel' Database check complete - Status: %s")
        overall_success = overall_success and app_kernel_ok

    if modw:
        xdmod_ok = check_r_db(akrr.db.get_xd_db,
                              "Checking 'modw' Database / User privileges...",
                              "'modw' Database check complete - Status: %s")
        overall_success = overall_success and xdmod_ok

    # DETERMINE: whether or not everything passed.

    if overall_success:
        log.info("All Databases / User privileges check out!")
        return True
    else:
        log.error(
            "One or more of the required databases and their required users ran into a problem. "
            "Please take note of the previous messages, correct the issue and re-run this script."
        )
        return False
Exemplo n.º 2
0
Arquivo: ssh.py Projeto: jtpalmer/akrr
def check_dir(sh, d, exit_on_fail=False, try_to_create=True, raise_on_fail=False):
    """
    check that directory exists and verify its accessibility
    return None,message if does not exists
    return True,message if can write there
    return False,message if can not write there
    """
    status, msg = check_dir_simple(sh, d)
    if try_to_create is True and status is None:
        log.info("Directory %s:%s does not exists, will try to create it", sh.remote_machine, d)
        if not akrr.dry_run:
            cmd = "mkdir -p \"%s\"" % (d,)
            ssh_command(sh, cmd)
            status, msg = check_dir_simple(sh, d)
        else:
            status, msg = (True, "Directory exist and accessible for read/write")
    if exit_on_fail is False:
        return status, msg

    if status is None:
        log.error("Directory %s:%s does not exists!", sh.remote_machine, d)
        if raise_on_fail:
            raise AkrrError("Directory %s:%s does not exists!" % (sh.remote_machine, d))
        else:
            exit()
    elif status is True:
        return True, msg
    else:
        log.error("Directory %s:%s is NOT accessible for read/write!", sh.remote_machine, d)
        if raise_on_fail:
            raise AkrrError("Directory %s:%s is NOT accessible for read/write!" % (sh.remote_machine, d))
        else:
            exit()
Exemplo n.º 3
0
def daemon_stop(pid: int = None, timeout: float = 120.0):
    """
    Stop AKRR server. Return True on success or False on timeout
    """
    import os
    import time
    import psutil
    import signal

    if pid is None:
        log.info("AKRR is already not running.")
        return True
    log.info("Sending termination signal to AKRR server (PID: " + str(pid) +
             ")")
    # send a signal to terminate
    os.kill(pid, signal.SIGTERM)

    # wait till process will finished
    start = time.time()
    while psutil.pid_exists(pid):
        time.sleep(0.5)
        if time.time() - start > timeout:
            log.error("Can not stopped daemon!")
            return False

    log.info("Stopped AKRR server (PID: " + str(pid) + ")")
    return True
Exemplo n.º 4
0
    def _set_env(self):
        # get location of openstack_env_set_script
        if self._which_env_set_script is None:
            if os.path.isfile(self._env_set_script):
                self._which_env_set_script = self._env_set_script
            else:
                self._which_env_set_script = self.run_cmd("which " + self._env_set_script)
                self._which_env_set_script = self._which_env_set_script.strip()

                if self._which_env_set_script.endswith(self._env_set_script):
                    self._which_env_set_script = os.path.expanduser(self._which_env_set_script)
            log.debug("which_openstack_env_set_script: " + self._which_env_set_script)

        # check presence of openstack_env_set_script
        if not os.path.isfile(self._which_env_set_script):
            msg = "Can not find openstack environment setup script: " + self._env_set_script
            msg += "\n" + self._which_env_set_script
            log.error(msg)
            raise FileNotFoundError(msg)

        # set environment
        while True:
            out = self.run_cmd("source " + self._which_env_set_script)
            if out.count("HTTP Client Error (HTTP 429)") > 0:
                time.sleep(30)
            else:
                break

        self._token = self.run_cmd("echo $OS_TOKEN").strip()
Exemplo n.º 5
0
def load_app(app_name: str,
             resources: Dict,
             app_cfg_filename: str = None,
             validate=True) -> Dict:
    """
    load app configuration file, do minimalistic validation
    return dict with app parameters

    raises error if can not load
    """
    log.debug("Loading app %s", app_name)
    app = load_app_default(app_name)
    # load resource specific parameters
    for resource_name in os.listdir(os.path.join(cfg_dir, "resources")):
        if resource_name in ['notactive', 'templates']:
            continue
        app_on_resource_cfg_filename = os.path.join(cfg_dir, "resources",
                                                    resource_name,
                                                    app_name + ".app.conf")
        if not os.path.isfile(app_on_resource_cfg_filename):
            continue
        try:
            app['appkernel_on_resource'][resource_name] = load_app_on_resource(
                app_name, resource_name, resources[resource_name], app)
        except Exception:
            log.error(
                "Exception occurred during app kernel configuration loading for %s from %s."
                % (app_name, app_on_resource_cfg_filename) +
                "Will skip it for now.")
            raise AkrrError("Can not load app configuration for %s." %
                            app_name)
    app = verify_app_params(app, app)
    return app
Exemplo n.º 6
0
    def _set_openstack_env(self):
        # get location of openstack_env_set_script
        if self._which_openstack_env_set_script is None:
            if os.path.isfile(self._openstack_env_set_script):
                self._which_openstack_env_set_script = self._openstack_env_set_script
            else:
                self._which_openstack_env_set_script = self._shell.run_command(
                    "which " + self._openstack_env_set_script)
                self._which_openstack_env_set_script = self._which_openstack_env_set_script.strip(
                )

                if self._which_openstack_env_set_script.endswith(
                        self._openstack_env_set_script):
                    self._which_openstack_env_set_script = os.path.expanduser(
                        self._which_openstack_env_set_script)
            log.debug("which_openstack_env_set_script: " +
                      self._which_openstack_env_set_script)

        # check presence of openstack_env_set_script
        if not os.path.isfile(self._which_openstack_env_set_script):
            msg = "Can not find openstack environment setup script: " + self._openstack_env_set_script
            msg += "\n" + self._which_openstack_env_set_script
            log.error(msg)
            raise FileNotFoundError(msg)

        # set environment
        self.run_cmd("source " + self._which_openstack_env_set_script)
        self._token = self.run_cmd("echo $OS_TOKEN").strip()
Exemplo n.º 7
0
def connect_to_resource(resource):
    """connect to resource defined in resource dictionary"""
    log.info("Validating resource accessibility. Connecting to %s.",
             resource['name'])
    if resource['ssh_private_key_file'] is not None and os.path.isfile(
            resource['ssh_private_key_file']) is False:
        log.error("Can not access ssh private key (%s)"
                  "", resource['ssh_private_key_file'])
        exit(1)

    str_io = io.StringIO()
    try:
        sys.stdout = sys.stderr = str_io
        rsh = akrr.util.ssh.ssh_resource(resource)

        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__

        log.info("Successfully connected to %s\n", resource['name'])
        log.empty_line()

        return rsh
    except AkrrError:
        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__

        log.critical("Can not connect to %s\nMessage:\n%s", resource['name'],
                     str_io.getvalue())
        exit(1)
Exemplo n.º 8
0
 def init_dir():
     """
     Make directories for configuration and logging
     """
     try:
         log.info("Creating directories structure.")
         if not os.path.isdir(_akrr_home):
             make_dirs(_akrr_home)
         if not os.path.isdir(os.path.join(_akrr_home, 'etc')):
             make_dirs(os.path.join(_akrr_home, 'etc'))
         if not os.path.isdir(os.path.join(_akrr_home, 'etc', 'resources')):
             make_dirs(os.path.join(_akrr_home, 'etc', 'resources'))
         if not os.path.isdir(os.path.join(_akrr_home, 'etc', 'resources')):
             make_dirs(os.path.join(_akrr_home, 'etc', 'resources'))
         if not os.path.isdir(os.path.join(_akrr_home, 'log')):
             make_dirs(os.path.join(_akrr_home, 'log'))
         if not os.path.isdir(os.path.join(_akrr_home, 'log', 'data')):
             make_dirs(os.path.join(_akrr_home, 'log', 'data'))
         if not os.path.isdir(os.path.join(_akrr_home, 'log', 'comptasks')):
             make_dirs(os.path.join(_akrr_home, 'log', 'comptasks'))
         if not os.path.isdir(os.path.join(_akrr_home, 'log', 'akrrd')):
             make_dirs(os.path.join(_akrr_home, 'log', 'akrrd'))
     except Exception as e:
         log.error("Can not create directories: " + str(e))
         exit(1)
Exemplo n.º 9
0
def _read_username_password(prompt="Enter username:"******"user",
                            password_on_default_user=None):
    if username is None:
        log.log_input(prompt)

    if username is None:
        username = input('[{0}]: '.format(default_username))
        if username == '':
            username = default_username
    else:
        log.info("User, " + username + ", already entered.")

    if username == default_username and password is None and password_on_default_user is not None:
        password = password_on_default_user

    if password is None:
        while True:
            log.log_input("Please specify a password:"******"Please reenter the password:"******"Entered passwords do not match. Please try again.")
    else:
        log.info("Password already entered.")
    return username, password
Exemplo n.º 10
0
def get_system_characteristics():
    """detect system characteristics or ask user about them"""
    global ppn
    while True:
        try:
            log.log_input("Enter processors (cores) per node count:")
            ppn = int(input(""))
            break
        except (ValueError, TypeError):
            log.error("Incorrect entry, try again.")
Exemplo n.º 11
0
def check_shell(rsh, resource):
    log.info("Checking if shell is BASH\n")
    msg = akrr.util.ssh.ssh_command(rsh, "echo $BASH")
    if msg.count("bash") > 0:
        log.info("Shell is BASH\n")
    else:
        log.error(
            "Shell on headnode of %s is not BASH, change it to bash and try again.\n",
            resource['name'])
        exit(1)
Exemplo n.º 12
0
 def is_api_up():
     from akrr import akrrrestclient
     request = akrrrestclient.get("/scheduled_tasks")
     if request.status_code == 200:
         return True
     else:
         log.error(
             'Unable to successfully contact the REST API: %s: %s',
             request.status_code, request.text)
         return False
Exemplo n.º 13
0
    def run(self):
        """execute what asked in command line"""
        log.info("AKRR Regression Tests")
        cli_args = self.root_parser.parse_args()

        self.process_common_args(cli_args)

        if hasattr(cli_args, "func"):
            cli_args.func(cli_args)
        else:
            log.error("There is no command specified!")
Exemplo n.º 14
0
    def archive_tasks_by_months(self,
                                months_old: int,
                                resources=None,
                                appkernels=None) -> None:
        """
        archive old task by months
        """
        log.info("Archiving tasks by months")

        resources = get_list_from_comma_sep_values(resources)
        appkernels = get_list_from_comma_sep_values(appkernels)
        time_now = datetime.datetime.now()
        mega_month_now = time_now.month + time_now.year * 12
        count = 0
        task_month_dirs = self.get_tasks_month_dir_list(resources, appkernels)
        n_task_month_dirs = len(task_month_dirs)
        progress_update = max(int(round(n_task_month_dirs / 50)), 1)
        for i in range(n_task_month_dirs):
            task_month_dir = task_month_dirs[i]
            if i % progress_update == 0:
                progress_bar(i / n_task_month_dirs)
            month = os.path.basename(task_month_dir)
            year = os.path.basename(os.path.dirname(task_month_dir))
            mega_month = int(month) + int(year) * 12

            if mega_month_now - mega_month < months_old:
                continue

            # unzip archives
            for archive in [
                    f for f in os.listdir(task_month_dir)
                    if f.endswith(".tar.gz")
            ]:
                archive_path = os.path.join(task_month_dir, archive)
                try:
                    targz = tarfile.open(archive_path, "r|gz")
                    targz.extractall(task_month_dir)
                    targz.close()
                    os.remove(archive_path)
                except Exception as e:
                    log.error("Can not extract %s:\n %s", archive_path, str(e))
            # zip month
            try:
                targz = tarfile.open(task_month_dir + '.tar.gz', "w|gz")
                targz.add(task_month_dir, month)
                targz.close()
                shutil.rmtree(task_month_dir)
                count += 1
            except Exception as e:
                log.error("Can not archive %s\n %s", task_month_dir, str(e))
        progress_bar()
        log.info("Archived %d task months" % count)
Exemplo n.º 15
0
    def run(self, args=None):
        """parse arguments and execute requested commands"""
        # PARSE: the command line parameters the user provided.
        cli_args = self.root_parser.parse_args(args=args)

        self.process_common_args(cli_args)

        # EXECUTE: the function provided in the '.set_defaults(func=...)'
        if hasattr(cli_args, "func"):
            return cli_args.func(cli_args)

        log.error("There is no command specified!")
        return None
Exemplo n.º 16
0
    def write_error_xml(self, filename, cdata=False):
        content = ("<body>\n"
                   "<xdtas>\n"
                   "  <batchJob>\n"
                   "   <status>Error</status>\n"
                   "   <errorCause>%s</errorCause>\n"
                   "   <reporter>%s</reporter>\n"
                   "   <errorMsg>%s</errorMsg>\n"
                   "  </batchJob>\n"
                   " </xdtas>\n"
                   "</body> \n") % (self.status, self.appName,
                                    self.status_info)
        if cdata:
            content = ("<body>\n"
                       " <xdtas>\n"
                       "  <batchJob>\n"
                       "   <status>Error</status>\n"
                       "   <errorCause>%s</errorCause>\n"
                       "   <reporter>%s</reporter>\n"
                       "   <errorMsg><![CDATA[%s]]></errorMsg>\n"
                       "  </batchJob>\n"
                       " </xdtas>\n"
                       "</body>\n") % (self.status, self.appName,
                                       self.status_info)
        # now lets try to read to parce it
        import xml.etree.ElementTree
        try:
            xml.etree.ElementTree.fromstring(content)
        except Exception as e:
            log.error(
                "Cannot write readable XML file (%s), will try CDATA declaration"
                % str(e))
            content = ("<body>\n"
                       " <xdtas>\n"
                       "  <batchJob>\n"
                       "   <status>Error</status>\n"
                       "   <errorCause>%s</errorCause>\n"
                       "   <reporter>%s</reporter>\n"
                       "   <errorMsg><![CDATA[%s]]></errorMsg>\n"
                       "  </batchJob>\n"
                       " </xdtas>\n"
                       "</body>\n") % (self.status, self.appName,
                                       self.status_info)
            try:
                xml.etree.ElementTree.fromstring(content)
            except Exception as e2:
                log.error("Cannot write readable XML file!!! %s" % str(e2))

        fout = open(filename, "w")
        fout.write(content)
        fout.close()
Exemplo n.º 17
0
def check_connection_to_rest_api():
    # get check connection
    try:
        r = akrrrestclient.get('/scheduled_tasks')
        if r.status_code != 200:
            log.error(
                "Can not get token for AKRR REST API ( %s )\nSee server response below\n%s",
                akrrrestclient.restapi_host, json.dumps(r.json(), indent=4))
            exit(1)
    except Exception as e:
        log.critical(
            "Can not connect to AKRR REST API ( %s )\nIs it running?\nSee full error report below",
            akrrrestclient.restapi_host)
        raise e
Exemplo n.º 18
0
def validate_resource_name(m_resource_name: str) -> bool:
    if m_resource_name.strip() == "":
        log.error("Bad name for resource, try a different name")
        return False
    # check config file presence
    file_path = os.path.abspath(os.path.join(resources_dir, m_resource_name))
    if os.path.exists(file_path):
        log.error(
            "Resource configuration directory (%s) for resource with name %s "
            "already present on file system, try a different name" % (file_path, m_resource_name,))
        return False

    # check the entry in mod_appkernel
    con_ak, cur_ak = akrr.db.get_ak_db(True)

    cur_ak.execute('''SELECT * FROM resource WHERE nickname=%s''', (m_resource_name,))
    resource_in_ak_db = cur_ak.fetchall()
    if len(resource_in_ak_db) != 0:
        log.error(
            "Resource with name %s already present in mod_appkernel DB, try a different name" % (m_resource_name,))
        return False

    # check the entry in mod_akrr
    db, cur = akrr.db.get_akrr_db(True)

    cur.execute('''SELECT * FROM resources WHERE name=%s''', (m_resource_name,))
    resource_in_db = cur.fetchall()
    if len(resource_in_db) != 0:
        log.error("Resource with name %s already present in mod_akrr DB, try a different name" % (m_resource_name,))
        return False

    return True
Exemplo n.º 19
0
def reprocess_parsed(args):
    if not (args.resource and args.appkernel):
        log.error('Please provide a resource, app')
        exit(1)
    resource = args.resource
    appkernel = args.appkernel
    time_start = args.time_start
    time_end = args.time_end
    verbose = args.verbose

    from akrr import daemon
    sch = daemon.AkrrDaemon(adding_new_tasks=True)
    sch.reprocess_completed_tasks(resource, appkernel, time_start, time_end,
                                  verbose)
Exemplo n.º 20
0
    def read_old_akrr_conf_dir(self, old_akrr_conf_dir):
        """Read old AKRR configuration file"""

        if not os.path.isdir(old_akrr_conf_dir):
            log.error("Directory with old AKRR configuration do not exist: " + old_akrr_conf_dir)
            exit(1)

        old_akrr_conf_file = os.path.join(old_akrr_conf_dir, "akrr.conf")
        if not os.path.isfile(old_akrr_conf_file):
            log.error("File with old AKRR configuration do not exist: " + old_akrr_conf_file)
            exit(1)

        from akrr.util import exec_files_to_dict
        log.info("Reading old AKRR configuration from: " + old_akrr_conf_file)
        self.old_akrr_conf = exec_files_to_dict(old_akrr_conf_file)
Exemplo n.º 21
0
    def check_utils():
        """
        check that ssh and openssl already installed
        """
        from distutils.spawn import find_executable

        errmsg = ""
        if not find_executable('ssh'):
            errmsg += "Can not find ssh in PATH, please install it.\n"
        if not find_executable('openssl'):
            errmsg += "Can not find openssl in PATH, please install it.\n"

        if errmsg != "":
            log.error(errmsg)
            exit(1)
Exemplo n.º 22
0
def check_appsig(rsh, resource):
    log.info("Testing app.signature calculator on headnode\n")
    out = akrr.util.ssh.ssh_command(
        rsh, "%s/execs/bin/appsigcheck.sh `which md5sum`" %
        (resource['appkernel_dir'], ))
    if out.count("===ExeBinSignature===") > 0 and out.count("MD5:") > 0:
        log.info("App.signature calculator is working on headnode\n")
    else:
        if akrr.dry_run:
            log.dry_run("App.signature calculator is not working\n")
            return
        log.error(
            "App.signature calculator is not working\n" +
            "See full error report below\n%s", out)
        exit(1)
Exemplo n.º 23
0
def validate_resource_parameter_file(resource_name):
    """validate resource parameter file and return dictionary with resource configuration"""
    # @todo reuse  cfg.verify_resource_params
    default_resource_param_filename = os.path.join(cfg.akrr_mod_dir,
                                                   "default_conf",
                                                   "default.resource.conf")
    resource_param_filename = os.path.join(cfg.cfg_dir, "resources",
                                           resource_name, "resource.conf")

    log.info("Validating %s parameters from %s", resource_name,
             resource_param_filename)

    if not os.path.isfile(resource_param_filename):
        log.error("resource parameters file (%s) does not exist!",
                  resource_param_filename)
        exit(1)

    # check syntax
    try:
        tmp = {}
        exec(
            compile(
                open(default_resource_param_filename).read(),
                default_resource_param_filename, 'exec'), tmp)
        exec(
            compile(
                open(resource_param_filename).read(), resource_param_filename,
                'exec'), tmp)
    except Exception as e:
        log.critical(
            "Can not load resource from %s.\nProbably invalid syntax.",
            resource_param_filename)
        raise e

    resource = None
    try:
        # now we can load akrr, parameters checking did h
        resource = cfg.find_resource_by_name(resource_name)
    except Exception as e:
        log.error("Can not load resource config from %s!\n%s\n%s",
                  resource_param_filename, str(e), traceback.format_exc())
        exit(1)

    log.info(
        "Syntax of %s is correct and all necessary parameters are present.",
        resource_param_filename)
    log.empty_line()
    return resource
Exemplo n.º 24
0
    def init_mysql_dbs(self):
        """
        Create AKRR database and access user, set the user access rights
        """
        try:
            def _create_db_user_gran_priv_if_needed(con_fun, user, password, db, priv, create):
                """
                Helping function to create db and user
                """
                if create:
                    log.info("Creating %s and user to access it" % (db,))
                else:
                    log.info("Setting user to access %s" % (db,))
                su_con, su_cur = con_fun(True, None)
                client_host = get_db_client_host(su_cur)

                if create:
                    _cursor_execute(su_cur, "CREATE DATABASE IF NOT EXISTS %s" % (cv(db),))

                create_user_if_not_exists(su_cur, user, password, client_host, dry_run=dry_run)
                _cursor_execute(su_cur, "GRANT " + cv(priv) + " ON " + cv(db) + ".* TO %s@%s", (user, client_host))

                su_con.commit()

            # During self.read_db_creds db and user was checked and
            # if they do not exist or not good enough super user credentials
            # was asked so if they not None that means that
            # either user or db or user priv needed to be set
            if self.akrr_db_su_user_name is not None:
                _create_db_user_gran_priv_if_needed(
                    self.get_akrr_db, self.akrr_db_user_name, self.akrr_db_user_password, self.akrr_db_name,
                    "ALL", True)
            if not self.stand_alone:
                if self.ak_db_su_user_name is not None:
                    _create_db_user_gran_priv_if_needed(
                        self.get_ak_db, self.ak_db_user_name, self.ak_db_user_password, self.ak_db_name,
                        "ALL", True)
                if self.xd_db_su_user_name is not None:
                    _create_db_user_gran_priv_if_needed(
                        self.get_xd_db, self.xd_db_user_name, self.xd_db_user_password, self.xd_db_name,
                        "SELECT", False)

        except Exception as e:
            import traceback
            traceback.print_exc()
            log.error("Can not execute the sql setup script: " + str(e))
            exit(1)
Exemplo n.º 25
0
def _read_sql_su_credentials(host, port):
    while True:
        log.log_input(
            "Please provide an administrative database user (for {}:{}) "
            "under which the installation sql script should "
            "run (This user must have privileges to create "
            "users and databases).".format(host, port))
        su_username = input("Username: "******"Please provide the password for the the user which you previously entered:")
        su_password = getpass.getpass()

        try:
            get_con_to_db(su_username, su_password, host, port)
            return su_username, su_password
        except Exception as e:
            log.error("MySQL error: " + str(e))
            log.error("Entered credential is not valid. Please try again.")
Exemplo n.º 26
0
    def check_previous_installation(self):
        """
        check that AKRR is not already installed
        """
        if os.path.exists(_akrr_cfg):
            if self.update:
                return
            else:
                msg = "This is a fresh installation script. " + _akrr_home + \
                      " contains previous AKRR installation. Either uninstall it or see documentation on updates.\n\n"
                msg += "To uninstall AKRR manually:\n\t1)remove " + _akrr_cfg + "\n\t\trm " + _akrr_cfg + "\n"
                msg += "\t2) (optionally for totally fresh start) drop mod_akrr and mod_appkernel database\n"
                msg += "\t\tDROP DATABASE mod_appkernel;\n"
                msg += "\t\tDROP DATABASE mod_akrr;\n\n"

                log.error(msg)
                exit(1)
Exemplo n.º 27
0
    def remove_tasks_state_dumps(self,
                                 days_old: int,
                                 resources=None,
                                 appkernels=None) -> None:
        """
        remove tasks state dumps
        """

        resources = get_list_from_comma_sep_values(resources)
        appkernels = get_list_from_comma_sep_values(appkernels)

        log.info("Removing tasks state dumps")
        log.debug("resources filter: " + str(resources))
        log.debug("appkernels filter: " + str(appkernels))
        log.debug("days: " + str(days_old))
        log.debug("dry_run: " + str(self.dry_run))
        log.debug("comp_task_dir: " + str(self.comp_task_dir))

        timenow = datetime.datetime.now()
        seconds_in_day = 24 * 3600
        count = 0
        for task_dir in self.get_tasks_dir_list(resources, appkernels):
            try:
                time_stamp = os.path.basename(task_dir)
                activate_time = time_stamp_to_datetime(time_stamp)
                days_passed = (timenow -
                               activate_time).total_seconds() / seconds_in_day
                if days_passed < days_old:
                    continue

                proc_dir = os.path.join(task_dir, "proc")
                if not os.path.isdir(proc_dir):
                    continue

                for state_file in os.listdir(proc_dir):
                    if _state_dump.match(state_file) is None:
                        continue
                    log.debug2("    delete:", state_file)
                    state_file_fullpath = os.path.join(proc_dir, state_file)
                    count += 1
                    if not self.dry_run:
                        os.remove(state_file_fullpath)
            except:
                log.error("Cannot process: " + task_dir)
        log.info("Removed %d task state dumps" % count)
Exemplo n.º 28
0
    def archive_tasks(self,
                      days_old: int,
                      resources=None,
                      appkernels=None) -> None:
        """
        archive old task
        """
        resources = get_list_from_comma_sep_values(resources)
        appkernels = get_list_from_comma_sep_values(appkernels)

        log.info("Archiving tasks")
        log.debug("resources filter: " + str(resources))
        log.debug("appkernels filter: " + str(appkernels))
        log.debug("days: " + str(days_old))
        log.debug("dry_run: " + str(self.dry_run))
        log.debug("comp_task_dir: " + str(self.comp_task_dir))

        time_now = datetime.datetime.now()
        seconds_in_day = 24 * 3600
        count = 0
        task_dirs = self.get_tasks_dir_list(resources, appkernels)
        n_task_dirs = len(task_dirs)
        progress_update = max(int(round(n_task_dirs / 50)), 1)
        for i in range(n_task_dirs):
            task_dir = task_dirs[i]
            if i % progress_update == 0:
                progress_bar(i / n_task_dirs)
            try:
                time_stamp = os.path.basename(task_dir)
                activate_time = time_stamp_to_datetime(time_stamp)
                days_passed = (time_now -
                               activate_time).total_seconds() / seconds_in_day
                if days_passed < days_old:
                    continue

                out = tarfile.open(task_dir + '.tar.gz', mode='w|gz')
                out.add(task_dir, time_stamp)
                out.close()
                shutil.rmtree(task_dir)
                count += 1
            except:
                log.error("Cannot process: " + task_dir)
        progress_bar()
        log.info("Archived %d tasks" % count)
Exemplo n.º 29
0
    def remove_tasks_workdir(self,
                             days_old: int,
                             resources=None,
                             appkernels=None) -> None:
        """
        remove tasks state dumps
        """

        resources = get_list_from_comma_sep_values(resources)
        appkernels = get_list_from_comma_sep_values(appkernels)

        log.info("Removing tasks workdir")
        log.debug("resources filter: " + str(resources))
        log.debug("appkernels filter: " + str(appkernels))
        log.debug("days: " + str(days_old))
        log.debug("dry_run: " + str(self.dry_run))
        log.debug("comp_task_dir: " + str(self.comp_task_dir))

        timenow = datetime.datetime.now()
        seconds_in_day = 24 * 3600
        count = 0
        for task_dir in self.get_tasks_dir_list(resources, appkernels):
            try:
                time_stamp = os.path.basename(task_dir)
                activate_time = time_stamp_to_datetime(time_stamp)
                days_passed = (timenow -
                               activate_time).total_seconds() / seconds_in_day
                if days_passed < days_old:
                    continue

                workdir_dir = os.path.join(task_dir, "jobfiles", "workdir")
                if not os.path.isdir(workdir_dir):
                    continue

                if log.verbose:
                    print("Found workdir:", workdir_dir)

                count += 1
                if not self.dry_run:
                    shutil.rmtree(workdir_dir)
            except:
                log.error("Cannot process: " + task_dir)
        log.info("Removed %d task workdirs" % count)
Exemplo n.º 30
0
    def handler(args):
        from akrr.util import log
        from akrr.daemon import get_daemon_pid, daemon_start, daemon_stop

        if args.cron is True:
            run_akrr_for_cron()
        else:
            log.info("Archiving old completed tasks")
            from akrr.archive import Archive
            if args.pickle_days > args.archive_days:
                log.error(
                    "pickle_days should be less or equal to archive_days")
                exit(1)
            if args.archive_months < 1:
                log.error("archive_months should be at least 1")
                exit(1)
            Archive().remove_tasks_state_dumps(days_old=args.pickle_days)
            Archive().archive_tasks(days_old=args.archive_days)
            Archive().archive_tasks_by_months(months_old=args.archive_months)