Exemplo n.º 1
0
 def test_create_reservation_different_site(self):
     conf = {
         'resources': {
             'a': {
                 'controller': 1,
                 'compute': 1,
                 'network': 1,
             },
             'b': {
                 'compute': 10
             }
         },
         'provider': {
             'name': 'test',
             'vlans': {}
         }
     }
     api.get_cluster_site = mock.Mock()
     api.get_cluster_site.side_effect = ['mysite', 'myothersite']
     provider = G5k()
     jobs_specs = provider._create_reservation(conf)
     expected = [(OarSubmission("{cluster='a'}/nodes=3",
                                name='test'), 'mysite'),
                 (OarSubmission("{cluster='b'}/nodes=10",
                                name='test'), 'myothersite')]
     self.equalsJobSpecs(expected, jobs_specs)
Exemplo n.º 2
0
 def run_xp(self):
     """Iterate over the parameters and execute the bench"""
     while len(self.sweeper.get_remaining()) > 0:
         comb = self.sweeper.get_next()
         if comb['n_core'] > get_host_attributes(comb['cluster']+'-1')['architecture']['smt_size'] * self.n_nodes: 
             self.sweeper.skip(comb)
             continue
         logger.info('Processing new combination %s' % (comb,))
         site = get_cluster_site(comb['cluster'])
         jobs = oarsub([(OarSubmission(resources = "{cluster='" + comb['cluster']+"'}/nodes=" + str(self.n_nodes),
                                       job_type = 'allow_classic_ssh', 
                                       walltime ='0:10:00'), 
                         site)])
         if jobs[0][0]:
             try:
                 wait_oar_job_start(*jobs[0])
                 nodes = get_oar_job_nodes(*jobs[0])
                 bench_cmd = 'mpirun -H %s -n %i %s ~/NPB3.3-MPI/bin/lu.%s.%i' % (
                     ",".join([node.address for node in nodes]),
                     comb['n_core'],
                     get_mpi_opts(comb['cluster']),
                     comb['size'],
                     comb['n_core'])
                 lu_bench = SshProcess(bench_cmd, nodes[0])
                 lu_bench.stdout_handlers.append(self.result_dir + '/' + slugify(comb) + '.out')
                 lu_bench.run()
                 if lu_bench.ok:
                     logger.info("comb ok: %s" % (comb,))
                     self.sweeper.done(comb)
                     continue
             finally:
                 oardel(jobs)
         logger.info("comb NOT ok: %s" % (comb,))
         self.sweeper.cancel(comb)
Exemplo n.º 3
0
 def test_create_reservation_vlan_different_site(self):
     conf = {
         'resources': {
             'a': {
                 'controller': 1,
                 'compute': 1,
                 'network': 1,
             }
         },
         'provider': {
             'name': 'test',
             'vlans': {
                 'myothersite': "{type='kavlan'}/vlan=1"
             },
         }
     }
     api.get_cluster_site = mock.Mock(return_value='mysite')
     provider = G5k()
     jobs_specs = provider._create_reservation(conf)
     expected = [(OarSubmission("{cluster='a'}/nodes=3",
                                name='test'), 'mysite'),
                 (OarSubmission("{type='kavlan'}/vlan=1",
                                name='test'), 'myothersite')]
     self.equalsJobSpecs(expected, jobs_specs)
Exemplo n.º 4
0
def get_cpu_topology(cluster, xpdir=None):
    """ """
    logger.info('Determining the architecture of cluster ' + \
                style.emph(cluster))
    root = None
    # Trying to reed topology from a directory
    if xpdir:
        fname = xpdir + '/topo_' + cluster + '.xml'
        try:
            tree = parse(fname)
            root = tree.getroot()
        except:
            logger.info('No cache file found, will reserve a node and ' + \
                        'determine topology from virsh capabilities')
            pass

    if root is None:
        frontend = get_cluster_site(cluster)
        submission = OarSubmission(resources="{cluster='" + cluster +
                                   "'}/nodes=1",
                                   walltime="0:02:00",
                                   job_type="allow_classic_ssh")
        ((job_id, _), ) = oarsub([(submission, frontend)])
        wait_oar_job_start(job_id, frontend)
        host = get_oar_job_nodes(job_id, frontend)[0]
        capa = SshProcess('virsh capabilities',
                          host,
                          connection_params={
                              'user':
                              default_frontend_connection_params['user']
                          }).run()
        oardel([(job_id, frontend)])
        root = fromstring(capa.stdout)
        if xpdir is not None:
            tree = ElementTree(root)
            tree.write(fname)

    cpu_topology = []
    i_cell = 0
    for cell in root.findall('.//cell'):
        cpu_topology.append([])
        for cpu in cell.findall('.//cpu'):
            cpu_topology[i_cell].append(int(cpu.attrib['id']))
        i_cell += 1
    logger.info(pformat(cpu_topology))
    return cpu_topology
Exemplo n.º 5
0
 def prepare_bench(self):
     """bench configuration and compilation, copy binaries to frontends
     
     return True if preparation is ok
     """
     logger.info("preparation: configure and compile benchmark")
     # the involved sites. We will do the compilation on the first of these.
     sites = list(set(map(get_cluster_site, self.parameters['cluster'])))
     # generate the bench compilation configuration
     bench_list = '\n'.join([ 'lu\t%s\t%s' % (size, n_core)
                              for n_core in self.parameters['n_core']
                              for size in self.parameters['size'] ])
     # Reserving a node because compiling on the frontend is forbidden
     # and because we need mpif77
     jobs = oarsub([(OarSubmission(resources = "nodes=1",
                                   job_type = 'allow_classic_ssh',
                                   walltime ='0:10:00'), sites[0])])
     if jobs[0][0]:
         try:
             logger.info("copying bench archive to %s" % (sites[0],))
             copy_bench = Put([sites[0]], ['NPB3.3-MPI.tar.bz2']).run()
             logger.info("extracting bench archive on %s" % (sites[0],))
             extract_bench = Remote('tar -xjf NPB3.3-MPI.tar.bz2', [sites[0]]).run()
             logger.info("waiting job start %s" % (jobs[0],))
             wait_oar_job_start(*jobs[0], prediction_callback = pred_cb)
             logger.info("getting nodes of %s" % (jobs[0],))
             nodes = get_oar_job_nodes(*jobs[0])
             logger.info("configure bench compilation")
             conf_bench = Remote('echo "%s" > ~/NPB3.3-MPI/config/suite.def' % bench_list, nodes).run()
             logger.info("compil bench")
             compilation = Remote('cd NPB3.3-MPI && make clean && make suite', nodes).run()
             logger.info("compil finished")
         except:
             logger.error("unable to compile bench")
             return False
         finally:
             oardel(jobs)
     # Copying binaries to all other frontends
     frontends = sites[1:]
     rsync = Remote('rsync -avuP ~/NPB3.3-MPI/ {{frontends}}:NPB3.3-MPI', 
                    [get_host_site(nodes[0])] * len(frontends)) 
     rsync.run()
     return compilation.ok and rsync.ok
Exemplo n.º 6
0
def get_hosts_jobs(hosts, walltime, out_of_chart=False):
    """Find the first slot when the hosts are available and return a
     list of jobs_specs

    :param hosts: list of hosts

    :param walltime: duration of reservation
    """
    hosts = map(lambda x: x.address if isinstance(x, Host) else x, hosts)
    planning = get_planning(elements=hosts, out_of_chart=out_of_chart)
    limits = _slots_limits(planning)
    walltime = get_seconds(walltime)
    for limit in limits:
        all_host_free = True
        for site_planning in planning.itervalues():
            for cluster, cluster_planning in site_planning.iteritems():
                if cluster in get_g5k_clusters():
                    for host_planning in cluster_planning.itervalues():
                        host_free = False
                        for free_slot in host_planning['free']:
                            if free_slot[0] <= limit and free_slot[
                                    1] >= limit + walltime:
                                host_free = True
                        if not host_free:
                            all_host_free = False
        if all_host_free:
            startdate = limit
            break
    else:
        logger.error('Unable to find a slot for %s', hosts)
        return None

    jobs_specs = []
    for site in planning.keys():
        site_hosts = map(get_host_longname,
                         filter(lambda h: get_host_site(h) == site, hosts))
        sub_res = "{host in ('" + "','".join(site_hosts) + "')}/nodes=" + str(
            len(site_hosts))
        jobs_specs.append((OarSubmission(resources=sub_res,
                                         reservation_date=startdate), site))

    return jobs_specs
Exemplo n.º 7
0
 def get_nodes(self, comb):
     """
         Perform a submission for a given comb and 
         retrieve the submission node list
     """
     logger.info('Performing submission')
     n_core = get_host_attributes(comb['cluster'] +
                                  '-1')['architecture']['smt_size']
     submission = OarSubmission(
         resources="nodes=%d" % (max(1, comb['cores'] / n_core), ),
         sql_properties="cluster='%s'" % comb['cluster'],
         job_type="besteffort",
         name="l2c_fft_eval")
     self.oar_job_id, self.frontend = oarsub([
         (submission, get_cluster_site(comb['cluster']))
     ])[0]
     logger.info("Waiting for job start")
     wait_oar_job_start(self.oar_job_id, self.frontend)
     logger.info("Retrieving hosts list")
     nodes = get_oar_job_nodes(self.oar_job_id, self.frontend)
     self.hosts = [host for host in nodes for i in range(n_core)]
Exemplo n.º 8
0
Arquivo: g5k.py Projeto: ivotron/enos
    def _create_reservation(self, conf):
        """Create the OAR Job specs."""
        provider_conf = conf['provider']
        criteria = {}
        # NOTE(msimonin): Traverse all cluster demands in alphebetical order
        # test_create_reservation_different_site needs to know the order
        for cluster, roles in sorted(conf["resources"].items(),
                                     key=lambda x: x[0]):
            site = api.get_cluster_site(cluster)
            nb_nodes = reduce(operator.add, map(int, roles.values()))
            criterion = "{cluster='%s'}/nodes=%s" % (cluster, nb_nodes)
            criteria.setdefault(site, []).append(criterion)

        for site, vlan in provider_conf["vlans"].items():
            criteria.setdefault(site, []).append(vlan)

        # Compute the specification for the reservation
        jobs_specs = [(OarSubmission(resources='+'.join(c),
                                     name=provider_conf["name"]), s)
                      for s, c in criteria.items()]
        logging.info("Criteria for the reservation: %s" % pf(jobs_specs))
        return jobs_specs
Exemplo n.º 9
0
    def _make_reservation(self):
        """Make a new reservation."""

        # Extract the list of criteria (ie, `oarsub -l
        # *criteria*`) in order to compute a specification for the
        # reservation.
        criteria = {}
        # Actual criteria are :
        # - Number of node per site
        for cluster, roles in self.config["resources"].items():
            site = get_cluster_site(cluster)
            nb_nodes = reduce(operator.add, map(int, roles.values()))
            criterion = "{cluster='%s'}/nodes=%s" % (cluster, nb_nodes)
            criteria.setdefault(site, []).append(criterion)

        for site, vlan in self.config["vlans"].items():
            criteria.setdefault(site, []).append(vlan)

        # Compute the specification for the reservation
        jobs_specs = [(OarSubmission(resources = '+'.join(c),
                                     name = self.config["name"]), s)
                      for s, c in criteria.items()]
        logger.info("Criteria for the reservation: %s" % pf(jobs_specs))

        # Make the reservation
        gridjob, _ = EX5.oargridsub(
            jobs_specs,
            reservation_date=self.config['reservation'],
            walltime=self.config['walltime'].encode('ascii', 'ignore'),
            job_type='deploy'
        )

        # TODO - move this upper to not have a side effect here
        if gridjob is not None:
            self.gridjob = gridjob
            logger.info("Using new oargrid job %s" % style.emph(self.gridjob))
        else:
            logger.error("No oar job was created.")
            sys.exit(26)
Exemplo n.º 10
0
                logger.error("Unable to deploy & compile Bench.")
                oardel(jobs)
                return False
        logger.info("Benchs completed. Deleting jobs.")
        oardel(jobs)
        return True

if __name__ == "__main__":
    args = sys.argv[1:]
    prop = args[0]
    site = args[1]
    folder = args[2]
    
    if len(args) >= 5:
        date = args[3]
        walltime = args[4]
        submission = OarSubmission(resources = "nodes=1",
                job_type = 'deploy',
                walltime = walltime,
                reservation_date = date,
                sql_properties = prop)
        launch_bench(submission, site, folder)
    else:
        walltime = args[3]
        submission = OarSubmission(resources = "nodes=1",
                job_type = 'deploy',
                walltime = walltime,
                sql_properties = prop)
        launch_bench(submission, site, folder)

    def run(self):
        sweeper = self.create_paramsweeper()

        while True:
            comb = sweeper.get_next()
            if not comb:
                break
            comb_dir = self.result_dir + '/' + slugify(comb)
            if not os.path.isdir(comb_dir):
                os.mkdir(comb_dir)
            comb_file = comb_dir + '/trace'
            g5k_configuration['kadeploy3'] = comb['version']
            logger.info('Treating combination %s', pformat(comb))
            get_version = SshProcess(
                comb['version'] + ' -v',
                comb['site'],
                connection_params=default_frontend_connection_params).run()
            logger.info(get_version.stdout)

            resources = ""
            if comb['kavlan']:
                resources += "{type='kavlan'}/vlan=1+"
            resources += "nodes=" + str(comb['n_nodes'])
            sub = OarSubmission(resources=resources,
                                job_type='deploy',
                                walltime="0:30:00",
                                name='Kadeploy_Tests')
            logger.info('Performing submission of %s on site %s', resources,
                        comb['site'])
            jobs = oarsub([(sub, comb['site'])])

            if jobs[0][0]:
                try:
                    logger.info('Waiting for job to start')
                    wait_oar_job_start(jobs[0][0], jobs[0][1])
                    hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1])
                    logger.info('Deployment of %s',
                                ' '.join([host.address for host in hosts]))
                    kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1])
                    if kavlan:
                        logger.info('In kavlan %s', kavlan)
                    deployment = Deployment(hosts,
                                            env_name=comb['env'],
                                            vlan=kavlan)
                    deployed, undeployed = deploy(deployment,
                                                  stdout_handlers=[comb_file],
                                                  stderr_handlers=[comb_file])

                finally:
                    logger.info('Destroying job %s on %s', str(jobs[0][0]),
                                jobs[0][1])
                    oardel([(jobs[0][0], jobs[0][1])])
            else:
                deployed = []

            if len(undeployed) == 0:
                logger.info('%s is OK', slugify(comb))
            elif len(deployed) == 0:
                logger.error('%s is KO', slugify(comb))
            else:
                logger.warning('%s encountered problems with some hosts',
                               slugify(comb))

            sweeper.done(comb)
Exemplo n.º 12
0
def get_jobs_specs(resources, excluded_elements=None, name=None):
    """ Generate the several job specifications from the dict of resources and
    the blacklisted elements

    :param resources: a dict, whose keys are Grid'5000 element and values the
      corresponding number of n_nodes

    :param excluded_elements: a list of elements that won't be used

    :param name: the name of the jobs that will be given
    """
    jobs_specs = []
    if excluded_elements == None:
        excluded_elements = []

    # Creating the list of sites used
    sites = []
    real_resources = resources.copy()
    for resource in resources:
        if resource in get_g5k_sites() and resource not in sites:
            sites.append(resource)
        if resource in get_g5k_clusters(queues=None):
            if resource not in excluded_elements:
                site = get_cluster_site(resource)
                if site not in sites:
                    sites.append(site)
                if site not in real_resources:
                    real_resources[site] = 0

    # Checking if we need a Kavlan, a KaVLAN global or none
    get_kavlan = 'kavlan' in resources
    if get_kavlan:
        kavlan = 'kavlan'
        n_sites = 0
        for resource in real_resources:
            if resource in sites:
                n_sites += 1
            if n_sites > 1:
                kavlan += '-global'
                break

    blacklisted_hosts = {}
    for element in excluded_elements:
        if element not in get_g5k_clusters(queues=None) + get_g5k_sites():
            site = get_host_site(element)
            if not 'site' in blacklisted_hosts:
                blacklisted_hosts[site] = [element]
            else:
                blacklisted_hosts[site].append(element)

    for site in sites:
        sub_resources = ''
        # Adding a KaVLAN if needed
        if get_kavlan:
            if not 'global' in kavlan:
                sub_resources = "{type='" + kavlan + "'}/vlan=1+"
                get_kavlan = False
            elif site in resources['kavlan']:
                sub_resources = "{type='" + kavlan + "'}/vlan=1+"
                get_kavlan = False

        base_sql = '{'
        end_sql = '}/'

        # Creating blacklist SQL string for hosts
        host_blacklist = False
        str_hosts = ''
        if site in blacklisted_hosts and len(blacklisted_hosts[site]) > 0:
            str_hosts = ''.join([
                "host not in ('" + get_host_longname(host) + "') and "
                for host in blacklisted_hosts[site]
            ])
            host_blacklist = True

        #Adding the clusters blacklist
        str_clusters = str_hosts if host_blacklist else ''
        cl_blacklist = False
        clusters_nodes = 0
        for cluster in get_site_clusters(site, queues=None):
            if cluster in resources and resources[cluster] > 0:
                if str_hosts == '':
                    sub_resources += "{cluster='" + cluster + "'}"
                else:
                    sub_resources += base_sql + str_hosts + "cluster='" + \
                        cluster + "'" + end_sql
                sub_resources += "/nodes=" + str(resources[cluster]) + '+'
                clusters_nodes += resources[cluster]
            if cluster in excluded_elements:
                str_clusters += "cluster not in ('" + cluster + "') and "
                cl_blacklist = True

        # Generating the site blacklist string from host and cluster blacklist
        str_site = ''
        if host_blacklist or cl_blacklist:
            str_site += base_sql
            if not cl_blacklist:
                str_site += str_hosts[:-4]
            else:
                str_site += str_clusters[:-4]
            str_site = str_site + end_sql

        if real_resources[site] > 0:
            sub_resources += str_site + "nodes=" + str(real_resources[site]) +\
                '+'

        if sub_resources != '':
            jobs_specs.append((OarSubmission(resources=sub_resources[:-1],
                                             name=name), site))

    return jobs_specs
    def run(self):
        """Run the experiment"""
        already_configured = self.options.already_configured
        reservation_job_id = int(self.options.reservation_id) \
            if self.options.reservation_id is not None else None
        is_a_test = self.options.is_a_test

        if is_a_test:
            logger.warn('THIS IS A TEST! This run will use only a few '
                        'resources')

        # make the result folder writable for all
        os.chmod(self.result_dir, 0o777)
        # Import configuration
        with open(self.args[0]) as config_file:
            config = json.load(config_file)
        # backup configuration
        copy(self.args[0], self.result_dir)

        site = config["grid5000_site"]
        resources = config["resources"]
        nb_experiment_nodes = config["nb_experiment_nodes"]
        walltime = str(config["walltime"])
        env_name = config["kadeploy_env_name"]
        workloads = config["workloads"]
        # check if workloads exists (Suppose that the same NFS mount point
        # is present on the remote and the local environment
        for workload_file in workloads:
            with open(workload_file):
                pass
            # copy the workloads files to the results dir
            copy(workload_file, self.result_dir)

        # define the workloads parameters
        self.parameters = {'workload_filename': workloads}
        logger.info('Workloads: {}'.format(workloads))

        # define the iterator over the parameters combinations
        self.sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"),
                                    sweep(self.parameters))

        # Due to previous (using -c result_dir) run skip some combination
        logger.info('Skipped parameters:' +
                    '{}'.format(str(self.sweeper.get_skipped())))

        logger.info('Number of parameters combinations {}'.format(
            str(len(self.sweeper.get_remaining()))))
        logger.info('combinations {}'.format(str(
            self.sweeper.get_remaining())))

        if reservation_job_id is not None:
            jobs = [(reservation_job_id, site)]
        else:
            jobs = oarsub([(OarSubmission(resources=resources,
                                          job_type='deploy',
                                          walltime=walltime), site)])
        job_id, site = jobs[0]
        if job_id:
            try:
                logger.info("waiting job start %s on %s" % (job_id, site))
                wait_oar_job_start(job_id,
                                   site,
                                   prediction_callback=prediction_callback)
                logger.info("getting nodes of %s on %s" % (job_id, site))
                nodes = get_oar_job_nodes(job_id, site)
                # sort the nodes
                nodes = sorted(nodes, key=lambda node: node.address)
                # get only the necessary nodes under the switch
                if nb_experiment_nodes > len(nodes):
                    raise RuntimeError('The number of given node in the '
                                       'reservation ({}) do not match the '
                                       'requested resources '
                                       '({})'.format(len(nodes),
                                                     nb_experiment_nodes))
                nodes = nodes[:nb_experiment_nodes]
                logger.info("deploying nodes: {}".format(str(nodes)))
                deployed, undeployed = deploy(
                    Deployment(nodes, env_name=env_name),
                    check_deployed_command=already_configured)
                if undeployed:
                    logger.warn("NOT deployed nodes: {}".format(
                        str(undeployed)))
                    raise RuntimeError('Deployement failed')

                if not already_configured:

                    # install OAR
                    install_cmd = "apt-get update; apt-get install -y "
                    node_packages = "oar-node"
                    logger.info("installing OAR nodes: {}".format(
                        str(nodes[1:])))
                    install_oar_nodes = Remote(
                        install_cmd + node_packages,
                        nodes[1:],
                        connection_params={'user': '******'})
                    install_oar_nodes.start()

                    server_packages = (
                        "oar-server oar-server-pgsql oar-user "
                        "oar-user-pgsql postgresql python3-pip "
                        "libjson-perl postgresql-server-dev-all")
                    install_oar_sched_cmd = """
                    mkdir -p /opt/oar_sched; \
                    cd /opt/oar_sched; \
                    git clone https://github.com/oar-team/oar3.git; \
                    cd oar3; \
                    git checkout dce942bebc2; \
                    pip3 install -e .; \
                    cd /usr/lib/oar/schedulers; \
                    ln -s /usr/local/bin/kamelot; \
                    pip3 install psycopg2
                    """
                    logger.info("installing OAR server node: {}".format(
                        str(nodes[0])))
                    install_master = SshProcess(
                        install_cmd + server_packages + ";" +
                        install_oar_sched_cmd,
                        nodes[0],
                        connection_params={'user': '******'})
                    install_master.run()
                    install_oar_nodes.wait()

                    if not install_master.ok:
                        Report(install_master)

                    configure_oar_cmd = """
                    sed -i \
                        -e 's/^\(DB_TYPE\)=.*/\\1="Pg"/' \
                        -e 's/^\(DB_HOSTNAME\)=.*/\\1="localhost"/' \
                        -e 's/^\(DB_PORT\)=.*/\\1="5432"/' \
                        -e 's/^\(DB_BASE_PASSWD\)=.*/\\1="oar"/' \
                        -e 's/^\(DB_BASE_LOGIN\)=.*/\\1="oar"/' \
                        -e 's/^\(DB_BASE_PASSWD_RO\)=.*/\\1="oar_ro"/' \
                        -e 's/^\(DB_BASE_LOGIN_RO\)=.*/\\1="oar_ro"/' \
                        -e 's/^\(SERVER_HOSTNAME\)=.*/\\1="localhost"/' \
                        -e 's/^\(SERVER_PORT\)=.*/\\1="16666"/' \
                        -e 's/^\(LOG_LEVEL\)\=\"2\"/\\1\=\"3\"/' \
                        -e 's#^\(LOG_FILE\)\=.*#\\1="{result_dir}/oar.log"#' \
                        -e 's/^\(JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD\=\"cpuset\".*\)/#\\1/' \
                        -e 's/^#\(CPUSET_PATH\=\"\/oar\".*\)/\\1/' \
                        -e 's/^\(FINAUD_FREQUENCY\)\=.*/\\1="0"/' \
                        /etc/oar/oar.conf
                    """.format(result_dir=self.result_dir)
                    configure_oar = Remote(configure_oar_cmd,
                                           nodes,
                                           connection_params={'user': '******'})
                    configure_oar.run()
                    logger.info("OAR is configured on all nodes")

                    # Configure server
                    create_db = "oar-database --create --db-is-local"
                    config_oar_sched = (
                        "oarnotify --remove-queue default;"
                        "oarnotify --add-queue default,1,kamelot")
                    start_oar = "systemctl start oar-server.service"
                    logger.info("configuring OAR database: {}".format(
                        str(nodes[0])))
                    config_master = SshProcess(
                        create_db + ";" + config_oar_sched + ";" + start_oar,
                        nodes[0],
                        connection_params={'user': '******'})
                    config_master.run()

                    # propagate SSH keys
                    logger.info("configuring OAR SSH")
                    oar_key = "/tmp/.ssh"
                    Process('rm -rf ' + oar_key).run()
                    Process(
                        'scp -o BatchMode=yes -o PasswordAuthentication=no '
                        '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null '
                        '-o ConnectTimeout=20 -rp -o User=root ' +
                        nodes[0].address + ":/var/lib/oar/.ssh"
                        ' ' + oar_key).run()
                    # Get(nodes[0], "/var/lib/oar/.ssh", [oar_key], connection_params={'user': '******'}).run()
                    Put(nodes[1:], [oar_key],
                        "/var/lib/oar/",
                        connection_params={
                            'user': '******'
                        }).run()
                    add_resources_cmd = """
                    oarproperty -a cpu || true; \
                    oarproperty -a core || true; \
                    oarproperty -c -a host || true; \
                    oarproperty -a mem || true; \
                    """
                    for node in nodes[1:]:
                        add_resources_cmd = add_resources_cmd + "oarnodesetting -a -h {node} -p host={node} -p cpu=1 -p core=4 -p cpuset=0 -p mem=16; \\\n".format(
                            node=node.address)

                    add_resources = SshProcess(
                        add_resources_cmd,
                        nodes[0],
                        connection_params={'user': '******'})
                    add_resources.run()

                    if add_resources.ok:
                        logger.info("oar is now configured!")
                    else:
                        raise RuntimeError(
                            "error in the OAR configuration: Abort!")

                # TODO backup de la config de OAR

                # Do the replay
                logger.info('begining the replay')
                while len(self.sweeper.get_remaining()) > 0:
                    combi = self.sweeper.get_next()
                    workload_file = os.path.basename(
                        combi['workload_filename'])
                    oar_replay = SshProcess(
                        script_path + "/oar_replay.py " +
                        combi['workload_filename'] + " " + self.result_dir +
                        "  oar_gant_" + workload_file, nodes[0])
                    oar_replay.stdout_handlers.append(self.result_dir + '/' +
                                                      workload_file + '.out')
                    logger.info("replaying workload: {}".format(combi))
                    oar_replay.run()
                    if oar_replay.ok:
                        logger.info("Replay workload OK: {}".format(combi))
                        self.sweeper.done(combi)
                    else:
                        logger.info("Replay workload NOT OK: {}".format(combi))
                        self.sweeper.cancel(combi)
                        raise RuntimeError("error in the OAR replay: Abort!")

            except:
                traceback.print_exc()
                ipdb.set_trace()

            finally:
                if is_a_test:
                    ipdb.set_trace()
                if reservation_job_id is None:
                    logger.info("delete job: {}".format(jobs))
                    oardel(jobs)