示例#1
0
def wall_time_parsed(args):
    if not args.list and not (args.resource and
                              args.appkernel and
                              args.nodes and
                              args.walltime):
        log.error(
            'Please provide a resource, app, node count and wall time.')
        exit(1)

    listing = args.list
    resource = args.resource
    app = args.appkernel
    nodes = args.nodes
    walltime = args.walltime
    comments = args.comments
    node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else [nodes]

    for nodes in node_list:
        data = {
            'resource_params': "{'nnodes':%d}" % (int(nodes),) if nodes else "{}",
            'app_param': '{}',
            'walltime': walltime,
            'comments': comments
        }
        try:
            from akrr import akrrrestclient

            result = akrrrestclient.post(
                '/walltime/%s/%s' % (resource, app),
                data=data) if not listing else \
                akrrrestclient.get(
                    '/walltime/%s/%s' % (resource, app),
                    data=data)
            if result.status_code == 200:
                if not listing:
                    log.info('Successfully updated wall time (resource %s: application kernel: %s nodes: %d).' % (
                             resource, app, nodes))
                else:
                    log.info(
                        'Successfully queried walltime records. \n%s',
                        result.text)
            else:
                log.error('something went wrong. %s:%s',
                          result.status_code,
                          result.text)
        except Exception as e:
            import traceback
            log.error('''
            An error occured while communicating
            with the REST API.
            %s: %s
            '''.strip(),
                      e.args[0] if len(e.args) > 0 else '',
                      e.args[1] if len(e.args) > 1 else '')
            print(traceback.print_exc())
示例#2
0
def new_task_parsed(args):
    """
    Handles the appropriate execution of a 'New Task' mode request
    given the provided command line arguments.
    """
    if not (args.resource and args.appkernel and args.nodes):
        log.error('Please provide a resource, application and node count.')
        exit(1)
    resource = args.resource
    app = args.appkernel
    time_to_start = args.start_time
    time_start = args.time_start  # if args.time_start else '01:00'
    time_end = args.time_end  # if args.time_end else '05:00'
    repeat_in = args.periodicity
    nodes = args.nodes
    node_list = [node.strip()
                 for node in nodes.split(',')] if ',' in nodes else list(nodes)

    for node in node_list:
        if time_start != None and time_end != None:
            time_to_start = calculate_random_start_time(
                args.start_time, repeat_in, time_start, time_end)
        data = {
            'resource': resource,
            'app': app,
            'time_to_start': time_to_start,
            'repeat_in': repeat_in,
            'resource_param': "{'nnodes':%s}" % (node, )
        }
        try:
            from akrr import akrrrestclient

            result = akrrrestclient.post('/scheduled_tasks', data=data)
            if result.status_code == 200:
                log.info('Successfully submitted new task')
            else:
                log.error('something went wrong. %s:%s', result.status_code,
                          result.text)
        except Exception as e:
            log.error(
                '''
            An error occured while communicating
            with the REST API.
            %s: %s
            ''', e.args[0] if len(e.args) > 0 else '',
                e.args[1] if len(e.args) > 1 else '')
示例#3
0
def submit_test_job(resource, app_name="test", nodes=2):
    # submit test job
    r = None
    try:
        payload = {
            'resource': resource['name'],
            'app': app_name,
            'resource_param': "{'nnodes':%d}" % nodes,
            'task_param': "{'test_run':True}"
        }
        r = akrrrestclient.post('/scheduled_tasks', data=payload)
        if r.status_code != 200:
            log.error(
                "Can not submit task through AKRR REST API ( %s )\nSee server response below\n%s\n",
                akrrrestclient.restapi_host, json.dumps(r.json(), indent=4))
            exit(1)
        task_id = r.json()['data']['data']['task_id']
    except Exception as e:
        if r is not None:
            log.critical(
                "Can not submit task through AKRR REST API ( %s )\n"
                "Is it still running?\nSee full error report below\n%s",
                akrrrestclient.restapi_host, r.json())
        else:
            log.critical(
                "Can not submit task through AKRR REST API ( %s )\n"
                "Is it still running?\n", akrrrestclient.restapi_host)
        raise e

    # write file with task_id
    test_job_lock_filename = get_test_job_lock_filename(resource, app_name)
    with open(test_job_lock_filename, "w") as fout:
        print(task_id, file=fout)

    log.info("\nSubmitted test job to AKRR, task_id is %d\n", task_id)
    return task_id
示例#4
0
def task_new(resource,
             appkernel,
             nodes,
             time_to_start=None,
             periodicity=None,
             time_window_start=None,
             time_window_end=None,
             test_run=False,
             dry_run=False,
             gen_batch_job_only=False,
             app_param=None,
             task_param=None):
    """
    Handles the appropriate execution of a 'New Task' mode request
    given the provided command line arguments.
    """
    import pprint
    from akrr.util.time import calculate_random_start_time, get_formatted_time_to_start

    node_list = [node.strip()
                 for node in nodes.split(',')] if ',' in nodes else [nodes]

    if time_to_start is not None:
        time_to_start = get_formatted_time_to_start(time_to_start)
        if time_to_start is None:
            raise AkrrValueException(
                "Unknown date-time format for time to start!")

    for node in node_list:
        if time_window_start is not None and time_window_end is not None:
            time_to_start = calculate_random_start_time(
                time_to_start, periodicity, time_window_start, time_window_end)
        data = {
            'resource': resource,
            'app': appkernel,
            'time_to_start': time_to_start,
            'repeat_in': periodicity,
            'resource_param': "{'nnodes':%s}" % node
        }

        s_task_param = ""
        if test_run:
            s_task_param += "'test_run':True"
        if task_param is not None:
            s_task_param += task_param if s_task_param == "" else "," + task_param
        if s_task_param != "":
            data['task_param'] = "{%s}" % s_task_param

        if app_param is not None:
            data['app_param'] = "{%s}" % app_param

        log.debug("Trying to submit: " + pprint.pformat(data))

        if dry_run:
            log.dry_run(
                "Should submit following to REST API (POST to scheduled_tasks) %s"
                % data)

        if gen_batch_job_only:
            generate_batch_job_for_testing(resource,
                                           appkernel,
                                           nodes,
                                           dry_run=dry_run)

        if dry_run or gen_batch_job_only:
            continue

        try:
            from akrr import akrrrestclient
            import json

            result = akrrrestclient.post('/scheduled_tasks', data=data)

            if result.status_code == 200:
                data_out = json.loads(result.text)["data"]["data"]
                log.info(
                    'Successfully submitted new task. The task id is %s.' %
                    data_out["task_id"])
            else:
                log.error('something went wrong. %s:%s', result.status_code,
                          result.text)

        except Exception as e:
            log.error(
                '''
            An error occured while communicating
            with the REST API.
            %s: %s
            ''', e.args[0] if len(e.args) > 0 else '',
                e.args[1] if len(e.args) > 1 else '')
            raise e
示例#5
0
文件: task_api.py 项目: nsimakov/akrr
def task_delete_selection(resource: str = None, appkernel: str = None, nodes: str = None, group_id: str = None,
                          active_tasks=False, scheduled_tasks=False):
    """
    delete tasks from schedule
    """
    from akrr import akrrrestclient
    import json

    from akrr.db import get_akrr_db
    from akrr.daemon import delete_task
    import time

    if not (resource or appkernel or nodes or group_id):
        raise AkrrValueException("Something out of resource/appkernel/nodes/group id should be set!")

    db, cur = get_akrr_db(dict_cursor=True)

    # ask scheduler not to start new tasks
    if akrrrestclient.post('/scheduler/no_new_tasks').status_code != 200:
        raise AkrrRestAPIException("Can not post scheduler/no_new_tasks")

    if active_tasks:
        # Now we need to wait till scheduler will be done checking active tasks
        while True:
            sql = "SELECT task_id FROM active_tasks WHERE task_lock > 0"
            log.debug(sql)
            cur.execute(sql)
            n_active_checking_task = len(cur.fetchall())
            if n_active_checking_task==0:
                break
            log.info("There are %d task which daemon is actively working on, waiting for it to pause.", n_active_checking_task)
            time.sleep(5)
        # now daemon is not working on any tasks

    # now we can work with db
    where = []
    if resource:
        where.append("resource='%s'" % resource)
    if appkernel:
        appkernel_list = ["'" + ak.strip() + "'" for ak in appkernel.split(',')] if ',' in appkernel else ["'" + appkernel + "'"]
        where.append("app IN (" + ",".join(appkernel_list) + ")")
    if group_id:
        where.append("group_id='%s'" % group_id)

    active_tasks_ids = []

    if nodes:
        node_list = [int(node.strip()) for node in nodes.split(',')] if ',' in nodes else [int(nodes)]
        for node in node_list:
            where_node1 = where + ["resource_param LIKE \"%'nnodes':"+str(node)+"}%\""]
            where_node2 = where + ["resource_param LIKE \"%'nnodes':"+str(node)+",%\""]
            for where_node in [where_node1, where_node2]:
                if scheduled_tasks:
                    sql = "DELETE FROM scheduled_tasks WHERE " + " AND ".join(where_node)
                    log.debug(sql)
                    cur.execute(sql)
                if active_tasks:
                    sql = "SELECT task_id FROM active_tasks WHERE " + " AND ".join(where_node)
                    log.debug(sql)
                    cur.execute(sql)
                    active_tasks_ids += [int(t['task_id']) for t in cur.fetchall()]
    else:
        if scheduled_tasks:
            sql = "DELETE FROM scheduled_tasks WHERE " + " AND ".join(where)
            log.debug(sql)
            cur.execute(sql)
        if active_tasks:
            sql = "SELECT task_id FROM active_tasks WHERE " + " AND ".join(where)
            log.debug(sql)
            cur.execute(sql)
            active_tasks_ids += [int(t['task_id']) for t in cur.fetchall()]

    if active_tasks:
        if len(active_tasks_ids)==0:
            log.info("No active tasks to delete")
        else:
            for task_id in active_tasks_ids:
                log.info("Deleting task_id %d", task_id)
                delete_task(task_id, remove_from_scheduled_queue=False, remove_from_active_queue=True,
                                remove_derived_task=False)

    if scheduled_tasks or active_tasks:
        db.commit()

    # ask scheduler can start new tasks now
    if akrrrestclient.post('/scheduler/new_tasks_on').status_code != 200:
        raise AkrrRestAPIException("Can not post scheduler/new_tasks_on")

    log.info("Done")
示例#6
0
文件: task_api.py 项目: nsimakov/akrr
def task_new(resource: str, appkernel: str, nodes: str, time_to_start=None, periodicity=None,
             time_window_start=None, time_window_end=None, test_run=False,
             dry_run:bool = False, gen_batch_job_only: bool = False, app_param=None, task_param=None,
             n_runs: int = 1, group_id: str = ""):
    """
    Handles the appropriate execution of a 'New Task' mode request
    given the provided command line arguments.
    """
    import pprint
    from akrr.util.time import calculate_random_start_time, get_formatted_time_to_start

    if appkernel == "all":
        import akrr.cfg
        import akrr.app
        appkernel_list = []
        resource_app_enabled = akrr.app.app_get_enabled()
        for ak in akrr.cfg.apps.keys():
            if resource not in akrr.cfg.apps[ak]['appkernel_on_resource']:
                continue
            if resource not in resource_app_enabled:
                continue
            if ak not in resource_app_enabled[resource]["apps"]:
                continue
            if "resource_app_enabled" not in resource_app_enabled[resource]["apps"][ak]:
                continue
            if not resource_app_enabled[resource]["apps"][ak]["resource_app_enabled"]:
                continue
            appkernel_list.append(ak)

        for ak in appkernel_list:
            task_new(
                resource, ak, nodes, time_to_start=time_to_start, periodicity=periodicity,
                time_window_start=time_window_start, time_window_end=time_window_end, test_run=test_run,
                dry_run=dry_run, gen_batch_job_only=gen_batch_job_only, app_param=app_param, task_param=task_param,
                n_runs=n_runs, group_id=group_id)
        return

    if nodes == "all":
        import akrr.cfg
        if appkernel not in akrr.cfg.apps:
            raise AkrrValueException("Unknown appkernel %s" % appkernel)
        if resource not in akrr.cfg.apps[appkernel]['appkernel_on_resource']:
            raise AkrrValueException("Unknown resource %s for appkernel %s" % (resource, appkernel))
        if resource not in akrr.cfg.apps[appkernel]['appkernel_on_resource']:
            raise AkrrValueException("Unknown resource %s for appkernel %s" % (resource, appkernel))
        if "num_of_nodes" in akrr.cfg.apps[appkernel]['appkernel_on_resource'][resource]:
            node_list = akrr.cfg.apps[appkernel]['appkernel_on_resource'][resource]['num_of_nodes']
        else:
            node_list = akrr.cfg.apps[appkernel]['num_of_nodes']
    else:
        node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else [nodes]

    if time_to_start is not None:
        time_to_start = get_formatted_time_to_start(time_to_start)
        if time_to_start is None:
            raise AkrrValueException("Unknown date-time format for time to start!")

    if n_runs > 1 and periodicity:
        raise AkrrValueException("n_runs larger than one can not be set with periodicity")

    for node in node_list:
        if time_window_start is not None and time_window_end is not None:
            time_to_start = calculate_random_start_time(
                time_to_start,
                periodicity,
                time_window_start,
                time_window_end)
        data = {
            'resource': resource,
            'app': appkernel,
            'time_to_start': time_to_start,
            'repeat_in': periodicity,
            'resource_param': "{'nnodes':%s}" % node
        }

        s_task_param = ""
        if test_run:
            s_task_param += "'test_run':True"
        if n_runs > 1:
            s_task_param += "" if s_task_param == "" else ","
            s_task_param += "'n_runs':%d" % n_runs
        if task_param is not None:
            s_task_param += "" if s_task_param == "" else ","
            s_task_param += task_param
        if s_task_param != "":
            data['task_param'] = "{%s}" % s_task_param

        if group_id != "":
            data['group_id'] = group_id

        if app_param is not None:
            data['app_param'] = "{%s}" % app_param

        log.debug("Trying to submit: "+pprint.pformat(data))

        if dry_run:
            log.dry_run("Should submit following to REST API (POST to scheduled_tasks) %s" % data)

        if gen_batch_job_only:
            generate_batch_job_for_testing(resource, appkernel, nodes, dry_run=dry_run)

        if dry_run or gen_batch_job_only:
            continue

        try:
            from akrr import akrrrestclient
            import json

            result = akrrrestclient.post(
                '/scheduled_tasks',
                data=data)

            if result.status_code == 200:
                data_out = json.loads(result.text)["data"]["data"]
                log.info('Successfully submitted new task. The task id is %s.' % data_out["task_id"])
            else:
                log.error(
                    'something went wrong. %s:%s',
                    result.status_code,
                    result.text)

        except Exception as e:
            log.error('''
            An error occured while communicating
            with the REST API.
            %s: %s
            ''', e.args[0] if len(e.args) > 0 else '', e.args[1] if len(e.args) > 1 else '')
            raise e
示例#7
0
def app_validate(resource, appkernel, nnodes):
    from akrr.util.log import verbose
    resource_name = resource
    app_name = appkernel

    error_count = 0
    warning_count = 0

    log.info("Validating " + app_name +
             " application kernel installation on " + resource_name)

    from akrr import get_akrr_dirs

    akrr_dirs = get_akrr_dirs()

    default_resource_param_filename = os.path.abspath(
        os.path.join(akrr_dirs['default_dir'], "default.resource.conf"))
    resource_param_filename = os.path.abspath(
        os.path.join(akrr_dirs['cfg_dir'], "resources", resource_name,
                     "resource.conf"))

    default_app_param_filename = os.path.abspath(
        os.path.join(akrr_dirs['default_dir'], "default.app.conf"))
    app_ker_param_filename = os.path.abspath(
        os.path.join(akrr_dirs['default_dir'], app_name + ".app.conf"))
    ###############################################################################################
    # validating resource parameter file

    log.info("#" * 80)
    log.info("Validating %s parameters from %s" %
             (resource_name, resource_param_filename))

    if not os.path.isfile(resource_param_filename):
        log.error("resource parameters file (%s) do not exists!" %
                  (resource_param_filename, ))
        exit(1)

    # check syntax
    try:
        tmp = {}
        exec(
            compile(
                open(default_resource_param_filename).read(),
                default_resource_param_filename, 'exec'), tmp)
        exec(
            compile(
                open(resource_param_filename).read(), resource_param_filename,
                'exec'), tmp)
    except Exception:
        log.exception("Can not load resource from "
                      "" + resource_param_filename + "\n" +
                      "Probably invalid syntax.")
        exit(1)
    # check syntax
    try:
        tmp = {}
        exec(
            compile(
                open(default_app_param_filename).read(),
                default_app_param_filename, 'exec'), tmp)
        exec(
            compile(
                open(app_ker_param_filename).read(), app_ker_param_filename,
                'exec'), tmp)
    except Exception:
        log.exception("Can not load application kernel from "
                      "" + app_ker_param_filename + "\n" +
                      "Probably invalid syntax")
        exit(1)

    # now we can load akrr
    from akrr import cfg
    from akrr import akrrrestclient
    from akrr.cli.resource_deploy import make_results_summary
    from akrr.cfg_util import load_app_default, load_app_on_resource

    resource = cfg.find_resource_by_name(resource_name)
    log.info(
        "Syntax of %s is correct and all necessary parameters are present." %
        resource_param_filename)

    cfg.find_app_by_name(app_name)
    try:
        app_default = load_app_default(app_name)
        app = load_app_on_resource(app_name, resource_name, resource,
                                   app_default)

        pprint.pprint(app)
    except Exception as e:  # pylint: disable=broad-except
        log.exception("Exception occurred during updated app loading:" +
                      str(e))
        exit(1)
    log.info(
        "Syntax of %s is correct and all necessary parameters are present." %
        app_ker_param_filename)

    # check if AK is in DB
    if True:
        # add entry to mod_appkernel.resource
        db_ak, cur_ak = akrr.db.get_ak_db(True)

        cur_ak.execute(
            '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''',
            (app_name, ))
        ak_in_akdb = cur_ak.fetchall()
        if len(ak_in_akdb) == 0:
            cur_ak.execute(
                '''INSERT INTO app_kernel_def (name,ak_base_name,processor_unit,enabled, description, visible)
                        VALUES(%s,%s,'node',0,%s,0);''',
                (app_name, app_name, app_name))
            db_ak.commit()
        cur_ak.execute(
            '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''',
            (app_name, ))
        ak_in_akdb = cur_ak.fetchall()[0]
        # add entry to mod_akrr.resource
        db, cur = akrr.db.get_akrr_db(True)

        cur.execute('''SELECT * FROM app_kernels WHERE name=%s''',
                    (app_name, ))
        ak_in_db = cur.fetchall()
        if len(ak_in_db) == 0:
            cur.execute(
                '''INSERT INTO app_kernels (id,name,enabled,nodes_list)
                        VALUES(%s,%s,0,'1,2,4,8');''',
                (ak_in_akdb['ak_def_id'], app_name))
            db.commit()

    ###############################################################################################
    # connect to resource
    log.info("#" * 80)
    log.info("Validating resource accessibility. Connecting to %s." %
             (resource['name']))
    if resource['ssh_private_key_file'] is not None and os.path.isfile(
            resource['ssh_private_key_file']) is False:
        log.error("Can not access ssh private key (%s)"
                  "" % (resource['ssh_private_key_file'], ))
        exit(1)

    str_io = io.StringIO()
    try:
        sys.stdout = sys.stderr = str_io
        # Connect to resource
        # Spin-up instance before ssh it
        if resource['batch_scheduler'].lower() == "openstack":
            # Start instance if it is cloud
            openstack_server = akrr.util.openstack.OpenStackServer(
                resource=resource)
            resource['openstack_server'] = openstack_server
            openstack_server.create()
            resource['remote_access_node'] = openstack_server.ip
        if resource['batch_scheduler'].lower() == "googlecloud":
            # Start instance if it is cloud
            googlecloud_server = akrr.util.googlecloud.GoogleCloudServer(
                resource=resource)
            resource['googlecloud_server'] = googlecloud_server
            googlecloud_server.create()
            resource['remote_access_node'] = googlecloud_server.ip

        rsh = akrr.util.ssh.ssh_resource(resource)

        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
    except Exception as e:
        msg2 = str_io.getvalue()
        msg2 += "\n" + traceback.format_exc()
        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
        msg = "Can not connect to """ + resource['name'] + "\n" + \
              "Probably invalid credential, see full error report below\n" + msg2
        log.error(msg)
        raise e
    print("=" * 80)
    log.info("Successfully connected to %s\n\n" % (resource['name']))

    ###############################################################################################
    log.info("Checking directory locations\n")

    d = resource['akrr_data']
    log.info("Checking: %s:%s" % (resource['remote_access_node'], d))
    status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True)
    log.info(msg + "\n")

    d = resource['appkernel_dir']
    log.info("Checking: %s:%s" % (resource['remote_access_node'], d))
    status, msg = check_dir(rsh, d, exit_on_fail=True, try_to_create=True)
    log.info(msg + "\n")

    d = resource['network_scratch']
    log.info("Checking: %s:%s" % (resource['remote_access_node'], d))
    status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False)
    if status is True:
        log.info(msg)
    else:
        log.warning(msg)
        log.warning(
            ("WARNING %d: network scratch might be have a different location "
             + "on head node, so if it is by design it is ok") %
            (warning_count + 1))
        warning_count += 1
    log.info("")

    d = resource['local_scratch']
    log.info("Checking: %s:%s" % (resource['remote_access_node'], d))
    status, msg = check_dir(rsh, d, exit_on_fail=False, try_to_create=False)
    if status is True:
        log.info(msg)
    else:
        log.warning(msg)
        log.warning(
            ("WARNING %d: local scratch might be have a different location " +
             "on head node, so if it is by design it is ok") %
            (warning_count + 1))
        warning_count += 1
    log.info("")

    # close connection we don't need it any more
    rsh.close(force=True)
    del rsh

    # Delete openstack instance after tests
    if resource['batch_scheduler'].lower() == "openstack":
        # delete instance if it is cloud
        resource['openstack_server'].delete()
        resource['remote_access_node'] = None
    if resource['batch_scheduler'].lower() == "googlecloud":
        # delete instance if it is cloud
        resource['googlecloud_server'].delete()
        resource['remote_access_node'] = None

    ###############################################################################################
    # send test job to queue

    log.info("#" * 80)
    log.info(
        "Will send test job to queue, wait till it executed and will analyze the output"
    )

    print("Will use AKRR REST API at", akrrrestclient.restapi_host)
    # get check connection
    try:
        r = akrrrestclient.get('/scheduled_tasks')
        if r.status_code != 200:
            log.error(
                "Can not get token for AKRR REST API ( "
                "" + akrrrestclient.restapi_host + " )\n" +
                "See server response below:\n %s",
                json.dumps(r.json(), indent=4))
            exit(1)
    except Exception:
        log.error("Can not connect to AKRR REST API ( "
                  "" + akrrrestclient.restapi_host + " )\n" +
                  "Is it running?\n" + "See full error report below:\n" +
                  traceback.format_exc())
        exit(1)

    # check if the test job is already submitted
    task_id = None
    test_job_lock_filename = os.path.join(
        cfg.data_dir, resource_name + "_" + app_name + "_test_task.dat")
    if os.path.isfile(test_job_lock_filename):
        fin = open(test_job_lock_filename, "r")
        task_id = int(fin.readline())
        fin.close()

        r = akrrrestclient.get('/tasks/' + str(task_id))
        if r.status_code != 200:
            task_id = None
        else:
            log.warning(
                "\nWARNING %d: Seems this is rerun of this script, will monitor task with task_id = "
                % (warning_count + 1) + str(task_id))
            log.warning("To submit new task delete " + test_job_lock_filename +
                        "\n")
            warning_count += 1
        # check how old is it
    # submit test job
    if task_id is None:
        try:
            payload = {
                'resource': resource_name,
                'app': app_name,
                'resource_param': "{'nnodes':%d}" % nnodes,
                'task_param': "{'test_run':True}"
            }
            r = akrrrestclient.post('/scheduled_tasks', data=payload)
            if r.status_code != 200:
                log.error(
                    "Can not submit task through AKRR REST API ( "
                    "" + akrrrestclient.restapi_host +
                    " )\n" + "See server response below",
                    json.dumps(r.json(), indent=4))
                exit(1)
            task_id = r.json()['data']['data']['task_id']
        except Exception:
            log.error("Can not submit task through AKRR REST API ( "
                      "" + akrrrestclient.restapi_host + " )\n" +
                      "Is it still running?\n" +
                      "See full error report below:\n" +
                      traceback.format_exc())
            exit(1)
        # write file with tast_id
        fout = open(os.path.join(test_job_lock_filename), "w")
        print(task_id, file=fout)
        fout.close()
        log.info("\nSubmitted test job to AKRR, task_id is " + str(task_id) +
                 "\n")
    # now wait till job is done
    msg_body0 = ""
    while True:
        t = datetime.datetime.now()
        # try:
        r = akrrrestclient.get('/tasks/' + str(task_id))

        if r.status_code == 200:
            response_json = r.json()

            msg_body = "=" * 80
            msg_body += "\nTast status:\n"

            if response_json["data"]["queue"] == "scheduled_tasks":
                msg_body += "Task is in scheduled_tasks queue.\n"
                msg_body += "It schedule to be started on " + response_json[
                    "data"]["data"]['time_to_start'] + "\n"
            elif response_json["data"]["queue"] == "active_tasks":
                msg_body += "Task is in active_tasks queue.\n"
                msg_body += "Status: " + str(
                    response_json["data"]["data"]['status']) + "\n"
                msg_body += "Status info:\n" + str(
                    response_json["data"]["data"]['status_info']) + "\n"
            elif response_json["data"]["queue"] == "completed_tasks":
                msg_body += "Task is completed!\n"
                completed_tasks = r.json()['data']['data']['completed_tasks']
                akrr_xdmod_instanceinfo = r.json(
                )['data']['data']['akrr_xdmod_instanceinfo']
                if verbose:
                    msg_body += "completed_tasks table entry:\n" + pp.pformat(
                        completed_tasks) + "\n"
                    msg_body += "akrr_xdmod_instanceinfo table entry:\n" + pp.pformat(
                        akrr_xdmod_instanceinfo) + "\n"
                    msg_body += 'output parsing results:\n' + akrr_xdmod_instanceinfo[
                        'body'] + "\n"
                else:
                    msg_body += "\tstatus: " + str(
                        akrr_xdmod_instanceinfo['status']) + "\n"
                    if akrr_xdmod_instanceinfo['status'] == 0:
                        msg_body += "\tstatus2: " + completed_tasks[
                            'status'] + "\n"
                    msg_body += "\tstatus_info: " + completed_tasks[
                        'status_info'] + "\n"
            else:
                msg_body += r.text + "\n"

            tail_msg = "time: " + t.strftime("%Y-%m-%d %H:%M:%S")

            if msg_body != msg_body0:
                print("\n\n" + msg_body)
                print(tail_msg, end=' ')
                sys.stdout.flush()
            else:
                print("\r" + tail_msg, end=' ')
                sys.stdout.flush()

            msg_body0 = copy.deepcopy(msg_body)

            if response_json["data"]["queue"] == "completed_tasks":
                break
        # try to update:
        try:
            payload = {'next_check_time': ''}
            akrrrestclient.put('/active_tasks/' + str(task_id), data=payload)
        except Exception:
            pass
        time.sleep(5)
    ###############################################################################################
    # analysing the output
    log.info("Test job is completed analyzing output\n")
    r = akrrrestclient.get('/tasks/' + str(task_id))
    if r.status_code != 200:
        log.error(
            "Can not get information about task\n" +
            "See full error report below", "AKRR server response:\n" + r.text)
        exit(1)
    completed_tasks = r.json()['data']['data']['completed_tasks']
    akrr_xdmod_instanceinfo = r.json(
    )['data']['data']['akrr_xdmod_instanceinfo']
    akrr_errmsg = r.json()['data']['data']['akrr_errmsg']

    results_summary = make_results_summary(resource_name, app_name,
                                           completed_tasks,
                                           akrr_xdmod_instanceinfo,
                                           akrr_errmsg)
    # execution was not successful
    if completed_tasks['status'].count("ERROR") > 0:
        if completed_tasks['status'].count(
                "ERROR Can not created batch job script and submit it to remote queue"
        ) > 0:
            log.error(
                "Can not created batch job script and/or submit it to remote queue\n"
                + "See full error report below:\n" + results_summary)
            os.remove(test_job_lock_filename)
            exit(1)
        else:
            log.error(completed_tasks['status'] + "\n" +
                      "See full error report below:\n" + results_summary)
            os.remove(test_job_lock_filename)
            exit(1)

    # execution was not successful
    if akrr_xdmod_instanceinfo['status'] == 0:
        log.error("Task execution was not successful\n" +
                  "See full error report below:\n" + results_summary)
        os.remove(test_job_lock_filename)
        exit(1)
    # see what is in report
    elm_perf = XMLElementTree.fromstring(akrr_xdmod_instanceinfo['body'])
    elm_perf.find('benchmark').find('parameters')
    elm_perf.find('benchmark').find('statistics')

    log.info("\nTest kernel execution summary:")
    print(results_summary)
    print()
    # log.info("\nThe output looks good.\n")
    if error_count == 0:
        # enabling resource for execution
        log.info("\nEnabling %s on %s for execution\n" %
                 (app_name, resource_name))
        try:
            result = akrrrestclient.put('/resources/%s/on' % (resource_name, ),
                                        data={'application': app_name})
            if result.status_code == 200:
                log.info("Successfully enabled %s on %s" %
                         (app_name, resource_name))
            else:
                if result is not None:
                    log.error(
                        "Can not turn-on %s on %s" % (app_name, resource_name),
                        result.text)
                else:
                    log.error("Can not turn-on %s on %s" %
                              (app_name, resource_name))
                exit(1)
            if True:
                # add entry to mod_appkernel.resource
                db_ak, cur_ak = akrr.db.get_ak_db(True)

                cur_ak.execute(
                    '''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''',
                    (app_name, ))
                ak_in_akdb = cur_ak.fetchall()
                if len(ak_in_akdb) == 0:
                    cur_ak.execute(
                        "INSERT INTO app_kernel_def (name,ak_base_name,processor_unit,enabled, description, visible)"
                        "VALUES(%s,%s,'node',0,%s,0);",
                        (app_name, app_name, app_name))
                    db_ak.commit()
                cur_ak.execute(
                    '''UPDATE app_kernel_def SET enabled=1,visible=1  WHERE ak_base_name=%s''',
                    (app_name, ))
                db_ak.commit()
                # add entry to mod_akrr.resource
                db, cur = akrr.db.get_akrr_db(True)

                cur.execute('''SELECT * FROM app_kernels WHERE name=%s''',
                            (app_name, ))
                ak_in_db = cur.fetchall()
                if len(ak_in_db) == 0:
                    cur.execute(
                        '''INSERT INTO app_kernels (id,name,enabled,nodes_list)
                                VALUES(%s,%s,0,'1,2,4,8');''',
                        (ak_in_akdb['ak_def_id'], app_name))
                    db.commit()
                cur.execute(
                    '''UPDATE app_kernels SET enabled=1  WHERE name=%s''',
                    (app_name, ))
                db.commit()
        except Exception:
            log.exception("Can not turn-on %s on %s", app_name, resource_name)
            exit(1)

    if error_count > 0:
        log.error("There are %d errors, fix them.", error_count)
    if warning_count > 0:
        log.warning(
            "\nThere are %d warnings.\nif warnings have sense (highlighted in yellow), you can move to next step!\n"
            % warning_count)
    if error_count == 0 and warning_count == 0:
        log.info("\nDONE, you can move to next step!\n")
    os.remove(test_job_lock_filename)