Exemplo n.º 1
0
def save_reboot_state(db_action, db):
    reboot_str = ""
    reboot_state = db.query(ActionProperty).filter(
        ActionProperty.node_name == db_action.node_name).filter(
            ActionProperty.prop_name == "reboot_state").first()
    if db_action.state_idx is None:
        # This is an hardreboot action initiated by the user, check if the node is deployed
        is_deployed = db.query(Schedule).filter(
            Schedule.node_name == db_action.node_name).filter(
                Schedule.action_state == "deployed").first()
        if is_deployed is not None:
            for process in PROCESS["deploy"]:
                if len(
                        process["environments"]
                ) == 0 or db_action.environment in process["environments"]:
                    reboot_str = "deploy?!%d" % (len(process["states"]) - 1)
    else:
        reboot_str = "%s?!%d" % (db_action.process, db_action.state_idx)
    if len(reboot_str) > 0:
        # Remember the last state of the current process
        if reboot_state is None:
            owner_email = db.query(ActionProperty).filter(
                ActionProperty.node_name == db_action.node_name).first().owner
            reboot_prop = ActionProperty()
            reboot_prop.node_name = db_action.node_name
            reboot_prop.prop_name = "reboot_state"
            reboot_prop.prop_value = reboot_str
            reboot_prop.owner = owner_email
            db.add(reboot_prop)
        else:
            reboot_state.prop_value = reboot_str
Exemplo n.º 2
0
def env_copy_exec(action, db):
    env_path = get_config()["env_path"]
    node_ip = db.query(RaspNode).filter(
        RaspNode.name == action.node_name).first().ip
    # WARN: the pimaster SSH user is in pimaster.switch (sorry)
    pimaster = db.query(RaspNode).filter(RaspNode.name == "pimaster").first()
    env = db.query(RaspEnvironment).filter(
        RaspEnvironment.name == action.environment).first()
    try:
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        ssh.connect(node_ip, username="******", timeout=SSH_TIMEOUT)
        # Get the path to the IMG file
        img_path = env_path + env.img_name
        logging.info("[%s] copy %s to the SDCARD" %
                     (action.node_name, img_path))
        # Write the image of the environment on SD card
        deploy_cmd = "rsh -o StrictHostKeyChecking=no %s@%s 'cat %s' | tar xzOf - | \
            pv -n -p -s %s 2> progress-%s.txt | dd of=/dev/mmcblk0 bs=4M conv=fsync &" % (
            pimaster.switch, pimaster.ip, img_path, env.img_size,
            action.node_name)
        (stdin, stdout, stderr) = ssh.exec_command(deploy_cmd)
        return_code = stdout.channel.recv_exit_status()
        ssh.close()
        act_prop = db.query(ActionProperty).filter(
            ActionProperty.node_name == action.node_name).filter(
                ActionProperty.prop_name == "percent").first()
        if act_prop is None:
            owner_email = db.query(ActionProperty).filter(
                ActionProperty.node_name == action.node_name).first().owner
            act_prop = ActionProperty()
            act_prop.node_name = action.node_name
            act_prop.prop_name = "percent"
            act_prop.prop_value = 0
            act_prop.owner = owner_email
            db.add(act_prop)
        else:
            act_prop.prop_value = 0
    except (BadHostKeyException, AuthenticationException, SSHException,
            socket.error) as e:
        logging.warning("[%s] SSH connection failed" % action.node_name)
    return True
Exemplo n.º 3
0
def deploy_exec(action, db):
    g5k_info = g5k_connect(action, db)
    g5k_site = g5k_info[0]
    g5k_user = g5k_info[1]
    for j in g5k_site.jobs.list(state="running", user=g5k_user):
        if str(j.uid) == action.node_name:
            j.refresh()
            if len(j.assigned_nodes) > 0:
                env = db.query(ActionProperty).filter(
                    ActionProperty.node_name == action.node_name).filter(
                        ActionProperty.prop_name == "environment").first()
                ssh_key = db.query(ActionProperty).filter(
                    ActionProperty.node_name == action.node_name).filter(
                        ActionProperty.prop_name == "ssh_key").first()
                old_dep = db.query(ActionProperty).filter(
                    ActionProperty.node_name == action.node_name).filter(
                        ActionProperty.prop_name == "deployment").first()
                logging.info("[%s] deploy the environment '%s'" %
                             (action.node_name, env.prop_value))
                deployment_conf = {
                    "nodes": j.assigned_nodes,
                    "environment": env.prop_value
                }
                if ssh_key is not None and len(ssh_key.prop_value) > 0:
                    deployment_conf["key"] = ssh_key.prop_value
                try:
                    dep = g5k_site.deployments.create(deployment_conf)
                    if old_dep is None:
                        # Create an action property to register the deployment UID
                        uid_prop = ActionProperty()
                        uid_prop.node_name = action.node_name
                        uid_prop.prop_name = "deployment"
                        uid_prop.prop_value = dep.uid
                        uid_prop.owner = env.owner
                        db.add(uid_prop)
                    else:
                        # Update the deployment UID (node_deployagain probably happens)
                        old_dep.prop_value = dep.uid
                    return True
                except:
                    logging.exception("Deployment error: ")
                    return False
    return False
Exemplo n.º 4
0
def system_conf_exec(action, db):
    pwd = db.query(ActionProperty).filter(
        ActionProperty.node_name == action.node_name).filter(
            ActionProperty.prop_name == "os_password").first()
    os_password = ""
    if pwd is None:
        # Generate the password
        os_password = new_password()
        owner_email = db.query(ActionProperty).filter(
            ActionProperty.node_name == action.node_name).first().owner
        act_prop = ActionProperty()
        act_prop.node_name = action.node_name
        act_prop.prop_name = "os_password"
        act_prop.prop_value = os_password
        act_prop.owner = owner_email
        db.add(act_prop)
    else:
        os_password = pwd.prop_value
    try:
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        ssh.connect(action.node_ip, username="******", timeout=SSH_TIMEOUT)
        if action.environment.startswith("picore"):
            # Set the hostname to modify the bash prompt
            cmd = "sed -i 's/$/ host=%s/g' boot_dir/cmdline3.txt" % action.node_name
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
            cmd = "sed -i 's/$/ host=%s/g' boot_dir/cmdline.txt" % action.node_name
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
        if action.environment.startswith("ubuntu"):
            # Set the password of the 'ubuntu' user
            cmd = "sed -i 's/tototiti/%s/' boot_dir/user-data" % os_password
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
            # Set the hostname to modify the bash prompt
            cmd = "echo '%s' > fs_dir/etc/hostname" % action.node_name
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
            # Create a ssh folder in the root folder of the SD CARD's file system
            cmd = "mkdir fs_dir/root/.ssh"
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
            # Add the public key of the server
            cmd = "cp /root/.ssh/authorized_keys fs_dir/root/.ssh/authorized_keys"
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
        if action.environment.startswith("raspbian"):
            # Create the ssh file in the boot partition to start SSH on startup
            cmd = "touch boot_dir/ssh"
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
            # Avoid the execution of the expand/resize script
            cmd = "sed -i 's:init=.*$::' boot_dir/cmdline.txt"
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
            # Set the hostname to modify the bash prompt
            cmd = "echo '%s' > fs_dir/etc/hostname" % action.node_name
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
            # Create a ssh folder in the root folder of the SD CARD's file system
            cmd = "mkdir fs_dir/root/.ssh"
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
            # Add the public key of the server
            cmd = "cp /root/.ssh/authorized_keys fs_dir/root/.ssh/authorized_keys"
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
        if action.environment == "raspbian_cloud9":
            cmd = "sed -i 's/-a :/-a admin:%s/' fs_dir/etc/systemd/system/cloud9.service" % os_password
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
        if action.environment == "raspbian_ttyd":
            cmd = "sed -i 's/toto/%s/' fs_dir/etc/rc.local" % os_password
            (stdin, stdout, stderr) = ssh.exec_command(cmd)
            return_code = stdout.channel.recv_exit_status()
    except (BadHostKeyException, AuthenticationException, SSHException,
            socket.error) as e:
        logging.warning("[%s] SSH connection failed" % action.node_name)
    return True
Exemplo n.º 5
0
def register_environment(arg_dict):
    db = open_session()
    node = db.query(Schedule
            ).filter(Schedule.node_name == arg_dict["node_name"]
            ).filter(Schedule.owner == arg_dict["user"]
            ).first()
    if node is None:
        close_session(db)
        msg = "No reservation for the node '%s'" % arg_dict["node_name"]
        logging.error("[%s] %s" % (arg_dict["node_name"], msg))
        return json.dumps({ "error": msg })
    # Check the image file does not exist yet
    file_name = os.path.basename(arg_dict["img_path"])
    env_path = get_config()["env_path"]
    if os.path.exists("%s%s" % (env_path, file_name)):
        msg = "The image file '%s' already exists in the server. Please, rename this file." % file_name
        logging.error("[%s] %s" % (arg_dict["node_name"], msg))
        return json.dumps({ "error": msg })
    node_action = db.query(Action).filter(Action.node_name == node.node_name).first()
    if node_action is not None:
        db.delete(node_action)
    # The deployment is completed, add a new action
    node_action = new_action(node, db)
    # The deployment is completed, add a new action
    init_action_process(node_action, "reg_env")
    db.add(node_action)
    # Delete old values
    old_props = db.query(ActionProperty
        ).filter(ActionProperty.node_name == node.node_name
        ).filter(ActionProperty.prop_name.in_(["img_path", "env_name" ])
        ).all()
    for p in old_props:
        db.delete(p)
    act_prop = ActionProperty()
    act_prop.node_name = node.node_name
    act_prop.prop_name = "img_path"
    act_prop.prop_value = arg_dict["img_path"]
    act_prop.owner = node.owner
    db.add(act_prop)
    act_prop = ActionProperty()
    act_prop.node_name = node.node_name
    act_prop.prop_name = "env_name"
    act_prop.prop_value = arg_dict["env_name"]
    act_prop.owner = node.owner
    db.add(act_prop)
    close_session(db)
    return json.dumps({ "success": "environment is registering" })
Exemplo n.º 6
0
def node_deploy(arg_dict):
    # Check the parameters
    error_msg = { "parameters": 
            "user: '******', 'nodes': {'node-3': { 'node_bin': 'my_bin', 'environment': 'my-env' }}" }
    if "user" not in arg_dict or "@" not in arg_dict["user"] or "nodes" not in arg_dict:
        return json.dumps(error_msg)
    # Check the nodes dictionnary
    node_prop = arg_dict["nodes"]
    user_email = arg_dict["user"]
    if isinstance(node_prop, dict):
        for val in node_prop.values():
            if not isinstance(val, dict):
                return json.dumps(error_msg)
    else:
        return json.dumps(error_msg)
    # Get the list of properties for the configuration
    conf_prop = CONFIGURE_PROP.copy()
    # Get the node with the 'configuring' state
    result = {}
    db = open_session()
    # Search the nodes to deploy in the schedule table (nodes in 'configuring' state)
    nodes = db.query(Schedule
            ).filter(Schedule.owner == user_email
            ).filter(Schedule.state == "configuring"
            ).all()
    for n in nodes:
        if n.node_name in node_prop:
            # Remove special characters from the node bin name
            safe_value = safe_string(node_prop[n.node_name].pop("node_bin"))
            # Remove spaces from value
            safe_value = safe_value.replace(" ", "_")
            node_bin = safe_value
            result[n.node_name] = {}
            # Check required properties
            required = [ prop for prop in conf_prop if conf_prop[prop]["mandatory"] ]
            for prop in required:
                if prop not in node_prop[n.node_name]:
                    if "missing" not in result[n.node_name]:
                        result[n.node_name]["missing"] = [ prop ]
                    else:
                        result[n.node_name]["missing"].append(prop)
            if len(result[n.node_name]) == 0:
                # Delete the existing configuration for this node
                existing = db.query(ActionProperty).filter(ActionProperty.node_name == n.node_name).all()
                for to_del in existing:
                    db.delete(to_del)
                # Write the configuration to the database
                for prop in node_prop[n.node_name]:
                    if len(node_prop[n.node_name][prop]) > 0:
                        act_prop = ActionProperty()
                        act_prop.node_name = n.node_name
                        act_prop.prop_name = prop
                        act_prop.owner = user_email
                        if "ssh_key" in prop or "os_password" == prop:
                            act_prop.prop_value = node_prop[n.node_name][prop]
                        else:
                            # Remove special characters from value
                            safe_value = safe_string(node_prop[n.node_name][prop])
                            # Remove spaces from value
                            safe_value = safe_value.replace(" ", "_")
                            act_prop.prop_value = safe_value
                        db.add(act_prop)
                n.state = "ready"
                n.bin = node_bin
                logging.info("[%s] change state to 'ready'" % n.node_name)
                result[n.node_name]["state"] = n.state
    close_session(db)
    return json.dumps(result)
Exemplo n.º 7
0
def node_reserve(arg_dict):
    # Check arguments
    if "filter" not in arg_dict or "user" not in arg_dict or \
        "start_date" not in arg_dict or "duration" not in arg_dict or \
        "g5k_user" not in arg_dict or "g5k_password" not in arg_dict:
        logging.error("Missing parameters: '%s'" % arg_dict)
        return json.dumps({
            "parameters": {
                "user": "******",
                "filter": "{...}",
                "start_date": 1623395254,
                "duration": 3,
                "g5k_password": "******",
                "g5k_user": "******"
            }
        })
    result = {"nodes": []}
    user = arg_dict["user"]
    f = arg_dict["filter"]
    # f = {'nb_nodes': '3', 'model': 'RPI4B8G', 'switch': 'main_switch'}
    nb_nodes = int(f["nb_nodes"])
    del f["nb_nodes"]
    start_date = arg_dict["start_date"]
    end_date = start_date + arg_dict["duration"] * 3600
    # Connect to the grid5000 API
    g5k_site = g5k_connect(arg_dict)
    # Get the node list
    servers = build_server_list(g5k_site)
    filtered_nodes = []
    if "name" in f:
        if f["name"] in servers:
            filtered_nodes.append(f["name"])
    else:
        # Get the node properties used in the filter
        node_props = {}
        if len(f) == 0:
            filtered_nodes += servers.keys()
        else:
            for node in servers.values():
                ok_filtered = True
                for prop in f:
                    if node[prop] != f[prop]:
                        ok_filtered = False
                if ok_filtered:
                    filtered_nodes.append(node["name"])
    # Check the availability of the filtered nodes
    logging.info("Filtered nodes: %s" % filtered_nodes)
    selected_nodes = []
    node_status = {}
    for node_name in filtered_nodes:
        cluster_name = node_name.split("-")[0]
        if cluster_name not in node_status:
            node_status[cluster_name] = status_to_reservations(
                g5k_site.clusters[cluster_name].status.list().nodes)
        ok_selected = True
        # Move the start date back 15 minutes to give the time for destroying the previous reservation
        back_date = start_date - 15 * 60
        # Check the schedule of the existing reservations
        if node_name in node_status[cluster_name]:
            for reservation in node_status[cluster_name][node_name]:
                # Only one reservation for a specific node per user
                if reservation["owner"] == user:
                    ok_selected = False
                # There is no reservation at the same date
                if (back_date > reservation["start_date"]
                        and back_date < reservation["end_date"]) or (
                            back_date < reservation["start_date"]
                            and end_date > reservation["start_date"]):
                    ok_selected = False
        if ok_selected:
            # Add the node to the reservation
            selected_nodes.append(node_name)
            if len(selected_nodes) == nb_nodes:
                # Exit when the required number of nodes is reached
                break
    logging.info("Selected nodes: %s" % selected_nodes)
    # Set the duration of the job
    walltime = "%s:00" % arg_dict["duration"]
    # Set a 'sleep' command that lasts 30 days, i.e, the maximum duration of this job.
    # This command allows to extend the reservation (see node_extend())
    command = "sleep %d" % (30 * 24 * 3600)
    job_conf = {
        "name": "piseduce %s" % datetime.now(),
        "resources": "nodes=%d,walltime=%s" % (len(selected_nodes), walltime),
        "command": command,
        "types": ["deploy"]
    }
    # Set the 'reservation' property to define the job's start date
    now = int(time.time())
    delta_s = start_date - now
    if delta_s > 5 * 60:
        # Only consider the start_date if this date is after the next 5 minutes
        local_date = datetime.fromtimestamp(start_date).astimezone(
            pytz.timezone("Europe/Paris"))
        job_conf["reservation"] = str(local_date)[:-6]
    if len(selected_nodes) == 1:
        # Reserve the node from its server name
        logging.info("Reservation the node '%s' with the walltime '%s'" %
                     (selected_nodes[0], walltime))
        job_conf["properties"] = "(host in ('%s.%s.grid5000.fr'))" % (
            selected_nodes[0], g5k_site.uid)
    else:
        # Reserve the nodes from cluster names
        clusters = set()
        for node in selected_nodes:
            clusters.add(node.split("-")[0])
        logging.info(
            "Reservation on the clusters '%s' with the walltime '%s'" %
            (clusters, walltime))
        job_conf["properties"] = "(cluster in (%s))" % ",".join(
            ["'%s'" % c for c in clusters])
    try:
        job = g5k_site.jobs.create(job_conf)
        result["nodes"] = selected_nodes
        # Store the g5k login/password to the DB in order to use it with agent_exec.py
        db = open_session()
        g5k_cred = ActionProperty()
        g5k_cred.node_name = job.uid
        g5k_cred.owner = arg_dict["user"]
        g5k_cred.prop_name = "g5k"
        g5k_cred.prop_value = "%s/%s" % (arg_dict["g5k_user"],
                                         arg_dict["g5k_password"])
        db.add(g5k_cred)
        close_session(db)
    except:
        logging.exception("Creating job: ")
    return json.dumps(result)
Exemplo n.º 8
0
def node_mine(arg_dict):
    if "user" not in arg_dict or "@" not in arg_dict["user"] or \
        "g5k_user" not in arg_dict or "g5k_password" not in arg_dict:
        return json.dumps({
            "parameters": {
                "user": "******",
                "g5k_user": "******",
                "g5k_password": "******"
            }
        })
    result = {"states": [], "nodes": {}}
    # Get the list of the states for the 'deploy' process
    py_module = import_module("%s.states" % get_config()["node_type"])
    PROCESS = getattr(py_module, "PROCESS")
    for p in PROCESS["deploy"]:
        if len(p["states"]) > len(result["states"]):
            result["states"] = p["states"]
    # Get the existing job for this user
    db = open_session()
    schedule = db.query(Schedule).filter(
        Schedule.owner == arg_dict["user"]).filter(
            Schedule.state != "configuring").all()
    db_jobs = {sch.node_name: sch for sch in schedule}
    if len(db_jobs) == 0:
        close_session(db)
        return json.dumps(result)
    # Connect to the grid5000 API
    g5k_site = g5k_connect(arg_dict)
    user_jobs = g5k_site.jobs.list(state="running", user=arg_dict["g5k_user"])
    user_jobs += g5k_site.jobs.list(state="waiting", user=arg_dict["g5k_user"])
    check_deleted_jobs(db_jobs, user_jobs, db)
    for j in user_jobs:
        j.refresh()
        uid_str = str(j.uid)
        if uid_str in db_jobs:
            my_conf = db_jobs[uid_str]
            result["nodes"][uid_str] = {
                "node_name": uid_str,
                "bin": my_conf.bin,
                "start_date": my_conf.start_date,
                "end_date": my_conf.end_date,
                "state": my_conf.state,
                "job_state": j.state
            }
            assigned_nodes = db.query(ActionProperty).filter(
                ActionProperty.node_name == my_conf.node_name).filter(
                    ActionProperty.prop_name == "assigned_nodes").first()
            if assigned_nodes is None:
                if len(j.assigned_nodes) > 0:
                    assigned_nodes = ActionProperty()
                    assigned_nodes.owner = arg_dict["user"]
                    assigned_nodes.node_name = my_conf.node_name
                    assigned_nodes.prop_name = "assigned_nodes"
                    assigned_nodes.prop_value = ",".join(j.assigned_nodes)
                    db.add(assigned_nodes)
                    result["nodes"][uid_str][
                        "assigned_nodes"] = assigned_nodes.prop_value
            else:
                result["nodes"][uid_str][
                    "assigned_nodes"] = assigned_nodes.prop_value
    close_session(db)
    return json.dumps(result)
Exemplo n.º 9
0
def node_deploy(arg_dict):
    # Check the parameters
    if "user" not in arg_dict or "@" not in arg_dict[
            "user"] or "nodes" not in arg_dict:
        error_msg = {
            "parameters": {
                "user": "******",
                "nodes": {
                    "node-3": {
                        "node_bin": "my_bin",
                        "environment": "my-env"
                    }
                }
            }
        }
        return json.dumps(error_msg)
    # Check the nodes dictionnary
    node_prop = arg_dict["nodes"]
    if isinstance(node_prop, dict):
        for val in node_prop.values():
            if not isinstance(val, dict):
                return json.dumps(error_msg)
    else:
        return json.dumps(error_msg)
    result = {}
    # Add the properties to the job configuration
    for node_name in node_prop:
        result[node_name] = {}
        my_prop = node_prop[node_name]
        if "node_bin" not in my_prop or len(my_prop["node_bin"]) == 0:
            if "missing" not in result[node_name]:
                result[node_name]["missing"] = ["node_bin"]
            else:
                result[node_name]["missing"].append("node_bin")
        if "environment" not in my_prop or len(my_prop["environment"]) == 0:
            if "missing" not in result[node_name]:
                result[node_name]["missing"] = ["environment"]
            else:
                result[node_name]["missing"].append("environment")
        if len(result[node_name]) == 0:
            # Remove special characters from the node bin name
            node_bin = safe_string(my_prop["node_bin"])
            # Remove spaces from value
            node_bin = node_bin.replace(" ", "_")
            # Record the job configuration to the database
            db = open_session()
            my_job = db.query(Schedule).filter(
                Schedule.node_name == node_name).first()
            if my_job is None:
                logging.error("job %s not found in the Schedule DB table" %
                              node_name)
            else:
                my_job.bin = node_bin
                my_job.state = "ready"
                env = ActionProperty()
                env.owner = arg_dict["user"]
                env.node_name = my_job.node_name
                env.prop_name = "environment"
                env.prop_value = my_prop["environment"]
                db.add(env)
                ssh_key = ActionProperty()
                ssh_key.owner = arg_dict["user"]
                ssh_key.node_name = my_job.node_name
                ssh_key.prop_name = "ssh_key"
                if "form_ssh_key" in my_prop and len(
                        my_prop["form_ssh_key"]) > 0:
                    ssh_key.prop_value = my_prop["form_ssh_key"]
                    db.add(ssh_key)
                elif "account_ssh_key" in my_prop and len(
                        my_prop["account_ssh_key"]) > 0:
                    ssh_key.prop_value = my_prop["account_ssh_key"]
                    db.add(ssh_key)
                close_session(db)
                result[node_name] = {"state": "ready"}
    return json.dumps(result)