def test_environment(self): needle_foo = 'xxx-foo-xxx' needle_bar = 'xxx-bar-xxx' scheduler = Scheduler() node = SshNode("localhost") env = Variables() service = Service("env", service_id='echo-environ', environ={ 'FOO': needle_foo, 'BAR': needle_bar, }) SshJob(scheduler=scheduler, node=node, commands=[ Run(service.start_command()), Run(service.journal_command(since='5s ago'), capture=Capture('journal', env)) ]) self.assertEqual(scheduler.run(), True) self.assertTrue(f"FOO={needle_foo}" in env.journal) self.assertTrue(f"BAR={needle_bar}" in env.journal)
def test_deferred_service(self): """ a service can be defined from a deferred instance rather than a plain string """ s = Scheduler() env = Variables() echo_service = Service(Deferred("echo {{run1}}", env), service_id='echo', verbose=True) n = SshNode(localhostname(), username=localuser()) Sequence(SshJob(n, commands=Run("echo from-first-run", capture=Capture('run1', env))), SshJob(n, commands=Run(echo_service.start_command())), SshJob(n, commands=Run(echo_service.journal_command( since="10 second ago"), capture=Capture('journal', env))), scheduler=s) print('STARTING', 20 * '-', echo_service.start_command()) s.run() print('DONE', 20 * '-', echo_service.start_command()) #print(f"env={env}") obtained = env.journal expected = "from-first-run" found = expected in obtained self.assertTrue(found)
def test_logic1(self): self.run_one_job(SshJob(node=self.gateway(), critical=False, commands=[Run("false"), Run("true")], label="should fail"), expected=False)
def test_deferred_chain(self): """ one command computes a string that gets passed to another one this is analogous to run1=$(ssh localhost echo from-first-run) final=$(ssh localhost echo ${run1}) the 'final' variable is only needed for checking everything went well """ s = Scheduler() env = Variables() n = SshNode(localhostname(), username=localuser()) Sequence(SshJob(n, commands=Run("echo from-first-run", capture=Capture('run1', env))), SshJob(n, commands=Run(Deferred("echo {{run1}}", env), capture=Capture('final', env))), scheduler=s) s.run() #print(f"env={env}") obtained = env.final expected = "from-first-run" self.assertEqual(obtained, expected)
def _run_xterm_node_shell(self, node, shell): if shell: xterm_command = RunString("""#!/bin/bash xterm """, x11=True) else: xterm_command = Run("xterm", x11=True) self.run_one_job( job=SshJob(node=node, command=[ Run("echo without x11, DISPLAY=$DISPLAY"), Run("echo with x11, DISPLAY=$DISPLAY", x11=True), xterm_command, ]))
def _allowed_retcod(self, allowed_exits, host="localhost", username=None): print(f"Testing allowed retcod allowed_exits={allowed_exits}") # global timeout total = 4 # scheduled duration long = 1 # we always exit code 100 retcod = 1000 if username is None: username = util.localuser() node = SshNode(host, username=username) scheduler = Scheduler(timeout = total, critical=False) SshJob(node=node, scheduler=scheduler, command=Run(f"sleep {long}; exit {retcod}", allowed_exits=allowed_exits)) expected = retcod in allowed_exits run = scheduler.run() scheduler.list() self.assertEqual(run, expected)
def test_p3(self): self.run_one_job( SshJob(node=self.gateway(), commands=[ Run("echo", "SshJob p3 commands plural", "$(hostname)") ], label='p3'))
def check_lease(experiment_scheduler, sshnode): """ re-usable function that acts a bit like a python decorator on schedulers. Given an experiment described as a scheduler, this function returns a higher-level scheduler that first checks for the lease, and then proceeds with the experiment. """ check_lease_job = SshJob( # checking the lease is done on the gateway node=faraday, # this means that a failure in any of the commands # will cause the scheduler to bail out immediately critical=True, command=Run("rhubarbe leases --check"), ) return Scheduler( Sequence( check_lease_job, # here we create a nested scheduler # by inserting the experiment_scheduler # as a regular job in the main scheduler experiment_scheduler, ))
def test_capture(self): s = Scheduler() f = CaptureFormatter() n = SshNode(localhostname(), username=localuser(), formatter=f) s.add(SshJob(node=n, commands=[ Run("echo LINE1"), Run("echo LINE2"), ])) f.start_capture() s.run() captured = f.get_capture() expected = "LINE1\nLINE2\n" self.assertEqual(captured, expected)
def all_off(slice, verbose, debug): """ expects a slice name, and turns off faraday completely """ # what argparse knows as a slice actually is a gateway (user + host) try: gwuser, gwhost = slice.split('@') except: gwuser, gwhost = slice, "faraday.inria.fr" gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=ColonFormatter(verbose=verbose), debug=debug) scheduler = Scheduler( SshJob( node=gwnode, command=Run("rhubarbe", "bye"), label="turn off", )) result = scheduler.orchestrate() if not result: print("RUN KO : {}".format(scheduler.why())) sched.debrief() else: print("faraday turned off OK") return 0 if result else 1
def _allowed_signal(self, allowed_exits, host="localhost", username=None): print(f"Testing allowed signal allowed_exits={allowed_exits}") # global timeout total = 4 # scheduled duration long = 2 # send signal after that amount short = 1 # we always kill with TERM signal = "TERM" if username is None: username = util.localuser() node = SshNode(host, username=username) scheduler = Scheduler(timeout = total, critical=False) SshJob(node=node, scheduler=scheduler, command=Run(f"sleep {long}", allowed_exits=allowed_exits)) SshJob(node=node, scheduler=scheduler, command=f"sleep {short}; pkill -{signal} sleep") expected = signal in allowed_exits run = scheduler.run() scheduler.list() self.assertEqual(run, expected)
def test_s3(self): self.run_one_job( SshJob(node=self.gateway(), command=[ Run("echo", "SshJob with s3 command singular", "$(hostname)") ], label='s3'))
def test_local_command2(self): self.run_one_job( # details = True, job=SshJob( node=LocalNode(), commands=[ Run("for i in $(seq 3); do echo line $i; sleep 1; done"), ]))
def test_local_command(self): # create random file in python rather than with /dev/random # that is not working in virtualbox random_full = "RANDOM-full" random_head = "RANDOM" self.random_file(random_full, size=19) print("DONE") self.run_one_job( # details = True, job=SshJob( node=LocalNode(), commands=[ Run(f"head -c {2**18} < {random_full} > {random_head}"), Run(f"ls -l {random_head}"), Run(f"shasum {random_head}"), ]))
def test_x11_shell(self): self.run_one_job(job=SshJob(node=self.gateway(), command=[ Run("echo DISPLAY=$DISPLAY", x11=True), RunString("""#!/bin/bash xlsfonts | head -5 """, x11=True) ]))
def all_off(slice, verbose, debug): """ expects a slice name, and turns off faraday completely """ # what argparse knows as a slice actually is a gateway (user + host) try: gwuser, gwhost = slice.split('@') except: gwuser, gwhost = slice, "faraday.inria.fr" gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=ColonFormatter(verbose=verbose), debug=debug) scheduler = Scheduler( Sequence( SshJob( node=gwnode, command=Run("rhubarbe", "leases", "--check"), label="check we have a current lease", ), SshJob( node=gwnode, command=Run("rhubarbe", "bye"), label="turn off", ))) result = scheduler.orchestrate() if not result: if check_for_lease.raised_exception(): print( "slice {} does not appear to hold a valid lease".format(slice)) else: print("RUN KO : {}".format(scheduler.why())) sched.debrief() else: print("faraday turned off OK") return 0 if result else 1
def check_expansion(self, *deferred_expected_s): s = Scheduler() formatters = {} for deferred, _ in deferred_expected_s: formatters[deferred] = f = CaptureFormatter() f.start_capture() n = SshNode(localhostname(), username=localuser(), formatter=f) s.add(SshJob(node=n, commands=Run(deferred))) s.run() for deferred, expected in deferred_expected_s: captured = formatters[deferred].get_capture() self.assertEqual(captured, expected)
def test_commands_verbose(self): dummy_path = "tests/dummy-10" dummy_file = Path(dummy_path).name scheduler = Scheduler() Sequence(SshJob( node=self.gateway(), verbose=True, commands=[ Run("hostname"), RunScript("tests/script-with-args.sh", "arg1", "arg2"), RunString("for i in $(seq 3); do echo verbose$i; done"), Push(localpaths=dummy_path, remotepath="."), Pull(remotepaths=dummy_file, localpath=dummy_path + ".loop"), ]), SshJob(node=LocalNode(), critical=True, commands=Run("diff {x} {x}.loop".format(x=dummy_path), verbose=True)), scheduler=scheduler) ok = scheduler.run() ok or scheduler.debrief() self.assertTrue(ok)
def test_file_loopback(self, size=20): # randomly create a 2**size chars file b1 = "random-{}".format(size) b2 = "loopback-{}".format(size) b3 = "again-{}".format(size) p1 = "tests/" + b1 p2 = "tests/" + b2 p3 = "tests/" + b3 self.random_file(p1, size) self.run_one_job( SshJob(node=self.gateway(), commands=[ Run("mkdir -p apssh-tests"), Push(localpaths=p1, remotepath="apssh-tests"), Pull(remotepaths="apssh-tests/" + b1, localpath="tests/" + b2), ])) with open(p1) as f1: s1 = f1.read() with open(p2) as f2: s2 = f2.read() self.assertEqual(s1, s2) # pull it again in another ssh connection self.run_one_job( SshJob(node=self.gateway(), commands=[ Run("mkdir -p apssh-tests"), Pull(remotepaths="apssh-tests/" + b1, localpath="tests/" + b3), ])) with open(p3) as f3: s3 = f3.read() self.assertEqual(s1, s3)
def test_mixed_commands(self): includes = ["tests/inclusion.sh"] self.run_one_job( SshJob(node=self.gateway(), commands=[ RunScript("tests/needsinclude.sh", "run1", includes=includes), Run("echo +++++; cat /etc/lsb-release; echo +++++"), RunScript("tests/needsinclude.sh", "another", "run", includes=includes) ], label='script_commands'))
def one_run(tx_power, phy_rate, antenna_mask, channel, *, run_name=default_run_name, slicename=default_slicename, load_images=False, node_ids=None, parallel=None, verbose_ssh=False, verbose_jobs=False, dry_run=False): """ Performs data acquisition on all nodes with the following settings Arguments: tx_power: in dBm, a string like 5, 10 or 14 phy_rate: a string among 1, 54 antenna_mask: a string among 1, 3, 7 channel: a string like e.g. 1 or 40 run_name: the name for a subdirectory where all data will be kept successive runs should use the same name for further visualization slicename: the Unix login name (slice name) to enter the gateway load_images: a boolean specifying whether nodes should be re-imaged first node_ids: a list of node ids to run the scenario on; strings or ints are OK; defaults to the all 37 nodes i.e. the whole testbed parallel: a number of simulataneous jobs to run 1 means all data acquisition is sequential (default) 0 means maximum parallel """ # # dry-run mode # just display a one-liner with parameters # if dry_run: load_msg = "" if not load_images else " LOAD" nodes = " ".join(str(n) for n in node_ids) print("dry-run: {run_name}{load_msg} -" " t{tx_power} r{phy_rate} a{antenna_mask} ch{channel} -" "nodes {nodes}" .format(**locals())) # in dry-run mode we are done return True # set default for the nodes parameter node_ids = [int(id) for id in node_ids] if node_ids is not None else default_node_ids ### # create the logs directory based on input parameters run_root = naming_scheme(run_name, tx_power, phy_rate, antenna_mask, channel, autocreate=True) # the nodes involved faraday = SshNode(hostname=default_gateway, username=slicename, formatter=TimeColonFormatter(), verbose=verbose_ssh) # this is a python dictionary that allows to retrieve a node object # from an id node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) for id in node_ids } # the global scheduler scheduler = Scheduler(verbose=verbose_jobs) ########## check_lease = SshJob( scheduler=scheduler, node=faraday, verbose=verbose_jobs, critical=True, command=Run("rhubarbe leases --check"), ) # load images if requested green_light = check_lease if load_images: # the nodes that we **do not** use should be turned off # so if we have selected e.g. nodes 10 12 and 15, we will do # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15 negated_node_ids = ["~{}".format(id) for id in node_ids] # replace green_light in this case green_light = SshJob( node=faraday, required=check_lease, critical=True, scheduler=scheduler, verbose=verbose_jobs, commands=[ Run("rhubarbe", "off", "-a", *negated_node_ids), Run("rhubarbe", "load", "-i", "u16-ath-noreg", *node_ids), Run("rhubarbe", "wait", *node_ids) ] ) ########## # setting up the wireless interface on all nodes # # this is a python feature known as a list comprehension # we just create as many SshJob instances as we have # (id, SshNode) couples in node_index # and gather them all in init_wireless_jobs # they all depend on green_light # # provide node-utilities with the ranges/units it expects frequency = channel_frequency[int(channel)] # tx_power_in_mBm not in dBm tx_power_driver = tx_power * 100 if load_images: # The first init_wireless_jobs always has troubles... Do it twice the first time (nasty hack) init_wireless_jobs = [ SshJob( scheduler=scheduler, required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), commands=[ RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver), RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver) ] ) for id, node in node_index.items()] else: init_wireless_jobs = [ SshJob( scheduler=scheduler, required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), command=RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver) ) for id, node in node_index.items()] # then install and run olsr on fit nodes run_olsr = [ SshJob( scheduler=scheduler, node=node, required=init_wireless_jobs, label="init and run olsr on fit nodes", verbose=verbose_jobs, command=RunScript("node-utilities.sh", "run-olsr") ) for i, node in node_index.items()] # after that, run tcpdump on fit nodes, this job never ends... run_tcpdump = [ SshJob( scheduler=scheduler, node=node, required=run_olsr, label="run tcpdump on fit nodes", verbose=verbose_jobs, commands=[ Run("echo run tcpdump on fit{:02d}".format(i)), Run("tcpdump -U -i moni-{} -y ieee802_11_radio -w /tmp/fit{}.pcap".format(wireless_driver, i)) ] ) for i, node in node_index.items()] # let the wireless network settle settle_wireless_job = PrintJob( "Let the wireless network settle", sleep=settle_delay, scheduler=scheduler, required=run_olsr, label="settling") ########## # create all the ping jobs, i.e. max*(max-1)/2 # this again is a python list comprehension # see the 2 for instructions at the bottom # # notice that these SshJob instances are not yet added # to the scheduler, we will add them later on # depending on the sequential/parallel strategy pings = [ SshJob( node=nodei, required=settle_wireless_job, label="ping {} -> {}".format(i, j), verbose=verbose_jobs, commands=[ Run("echo {} '->' {}".format(i, j)), RunScript("node-utilities.sh", "my-ping", "10.0.0.{}".format(j), ping_timeout, ping_interval, ping_size, ping_number, ">", "PING-{:02d}-{:02d}".format(i, j)), Pull(remotepaths="PING-{:02d}-{:02d}".format(i, j), localpath=str(run_root)), ] ) # looping on the source, now only fit01 is source for i, nodei in node_index.items() # and on the destination for j, nodej in node_index.items() # and keep only half of the couples if (j > i) and (i==1) ] # retrieve all pcap files from fit nodes retrieve_tcpdump = [ SshJob( scheduler=scheduler, node=nodei, required=pings, label="retrieve pcap trace from fit{:02d}".format(i), verbose=verbose_jobs, commands=[ RunScript("node-utilities.sh", "kill-olsr"), Run("sleep 1;pkill tcpdump; sleep 1"), RunScript("node-utilities.sh", "process-pcap", i), Run( "echo retrieving pcap trace and result-{i}.txt from fit{i:02d}".format(i=i)), Pull(remotepaths=["/tmp/fit{}.pcap".format(i), "/tmp/result-{}.txt".format(i)], localpath=str(run_root)), ] ) for i, nodei in node_index.items() ] # xxx this is a little fishy # should we not just consider that the default is parallel=1 ? if parallel is None: # with the sequential strategy, we just need to # create a Sequence out of the list of pings # Sequence will add the required relationships scheduler.add(Sequence(*pings, scheduler=scheduler)) # for running sequentially we impose no limit on the scheduler # that will be limitied anyways by the very structure # of the required graph jobs_window = None else: # with the parallel strategy # we just need to insert all the ping jobs # as each already has its required OK scheduler.update(pings) # this time the value in parallel is the one # to use as the jobs_limit; if 0 then inch'allah jobs_window = parallel # if not in dry-run mode, let's proceed to the actual experiment ok = scheduler.orchestrate(jobs_window=jobs_window) # give details if it failed if not ok: scheduler.debrief() # data acquisition is done, let's aggregate results # i.e. compute averages if ok: post_processor = Aggregator(run_root, node_ids, antenna_mask) post_processor.run() return ok
hostname="fit02", username="******", verbose=verbose_ssh) ########## # create an orchestration scheduler scheduler = Scheduler() ########## check_lease = SshJob( # checking the lease is done on the gateway node=faraday, # this means that a failure in any of the commands # will cause the scheduler to bail out immediately critical=True, command=Run("rhubarbe leases --check"), scheduler=scheduler, ) #################### # This is our own brewed script for setting up a wifi network # it run on the remote machine - either sender or receiver # and is in charge of initializing a small ad-hoc network # # Thanks to the RunString class, we can just define this as # a python string, and pass it arguments from python variables # turn_on_wireless_script = """#!/bin/bash # we expect the following arguments
def test_graphics1(self): scheduler = Scheduler(critical=False) gateway = SshNode(hostname=localhostname(), username=localuser()) Sequence( SshJob( node=gateway, command='hostname', ), SshJob(node=gateway, command=[ Run('ls /etc/passwd'), Run('wc -l /etc/passwd'), ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo with RunString on $(hostname) at $(date)"), ]), SshJob(node=gateway, commands=[ RunScript("tests/testbasic.sh"), ]), SshJob(node=gateway, commands=[ Run('wc -l /etc/passwd'), RunString( "#!/usr/bin/env bash\n" "echo with RunsString on $(hostname) at $(date)", remote_name="show-host-date"), RunScript("tests/testbasic.sh"), ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n", 10) ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n", 10, remote_name='short-show-args') ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n" "echo second arg is $2\n" "echo third arg is $3\n" "echo fourth arg is $4\n", 100, 200, 300, 400) ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n" "echo second arg is $2\n" "echo third arg is $3\n" "echo fourth arg is $4\n", 1000, 2000, 3000, 4000, remote_name='long-show-args') ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n" "echo second arg is $2\n" "echo third arg is $3\n" "echo fourth arg is $4\n", 1000, 2000, 3000, 4000, remote_name='long-show-args', label='snip') ]), SshJob(node=gateway, commands=[ Run("hostname", label="Run()"), RunScript("foobar", label="RunScript()"), RunString("foobar", label="RunString()"), Push("foobar", remotepath="remote", label="Push()"), Pull("remote", localpath="foobar", label="Pull()"), Run("hostname", label=None), RunScript("foobar", label=[]), RunString("foobar", label=0), Push("foobar", remotepath="remote", label={}), Pull("remote", localpath="foobar", label=""), ]), scheduler=scheduler, ) print("NO DETAILS") scheduler.list() print("WITH DETAILS") scheduler.list(details=True) produce_png(scheduler, "test_graphics1") ok = scheduler.run() self.assertFalse(ok)
username="******", verbose=verbose_ssh) node2 = SshNode(gateway=faraday, hostname="fit02", username="******", verbose=verbose_ssh) nodes = (node1, node2) ########## create the scheduler instance upfront scheduler = Scheduler() check_lease = SshJob( node=faraday, critical=True, command=Run("rhubarbe leases --check"), scheduler=scheduler, ) ########## 1 step, generate a random data file of 1 M bytes create_random_job = SshJob( node=LocalNode(), commands=[ Run("head", "-c", random_size, "<", "/dev/random", ">", "RANDOM"), Run("ls", "-l", "RANDOM"), Run("shasum", "RANDOM"), ], required=check_lease, scheduler=scheduler, )
hostname="fit02", username="******", verbose=verbose_ssh) ########## # create an orchestration scheduler scheduler = Scheduler() ########## check_lease = SshJob( # checking the lease is done on the gateway node=faraday, # this means that a failure in any of the commands # will cause the scheduler to bail out immediately critical=True, command=Run("rhubarbe leases --check"), scheduler=scheduler, ) # the job to wait before proceeding ready_requirement = check_lease # has the user requested to load images ? if args.load: ready_requirement = SshJob( node=faraday, commands=[ Run('rhubarbe load -i ubuntu 1 2'), Run('rhubarbe wait 1 2'), ], required=check_lease, scheduler=scheduler,
def run(*, gateway, slicename, disaggregated_cn, operator_version, nodes, node_master, node_enb, quectel_nodes, phones, flexran, drone, verbose, dry_run, load_images, master_image, worker_image, quectel_image): """ Install K8S on R2lab Arguments: slicename: the Unix login name (slice name) to enter the gateway quectel_nodes: list of indices of quectel UE nodes to use phones: list of indices of phones to use nodes: a list of node ids to run the scenario on; strings or ints are OK; node_master: the master node id, must be part of selected nodes node_enb: the node id for the enb, which is connected to usrp/duplexer disaggregated_cn: Boolean; True for the disaggregated CN scenario. False for all-in-one CN. operator_version: str, either "none" or "v1" or "v2". """ if operator_version == "none": only_kube5g = True else: only_kube5g = False if node_master not in nodes: print(f"master node {node_master} must be part of selected fit nodes {nodes}") exit(1) if node_enb not in nodes: print(f"eNB worker node {node_enb} must be part of selected fit nodes {nodes}") exit(1) # Check if the browser can be automatically run to display the Drone app if drone: run_browser = True if platform == "linux": cmd_open = "xdg-open" elif platform == "darwin": cmd_open = "open" else: run_browser = False if run_browser: print(f"**** Will run the browser with command {cmd_open}") else: print(f"**** Will not be able to run the browser as platform is {platform}") worker_ids = nodes[:] worker_ids.remove(node_master) quectel_ids = quectel_nodes[:] quectel = len(quectel_ids) > 0 faraday = SshNode(hostname=default_gateway, username=slicename, verbose=verbose, formatter=TimeColonFormatter()) master = SshNode(gateway=faraday, hostname=fitname(node_master), username="******", verbose=verbose, formatter=TimeColonFormatter()) node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******",formatter=TimeColonFormatter(), verbose=verbose) for id in nodes } nodes_quectel_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******",formatter=TimeColonFormatter(), verbose=verbose) for id in quectel_nodes } worker_index = dict(node_index) del worker_index[node_master] fit_master = fitname(node_master) fit_enb = fitname(node_enb) # the global scheduler scheduler = Scheduler(verbose=verbose) ########## check_lease = SshJob( scheduler=scheduler, node = faraday, critical = True, verbose=verbose, command = Run("rhubarbe leases --check"), ) green_light = check_lease if load_images: green_light = [ SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose, label = f"Load image {master_image} on master {fit_master}", commands=[ Run(f"rhubarbe load {node_master} -i {master_image}"), Run(f"rhubarbe wait {node_master}"), ] ), SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose, label = f"Load image {worker_image} on worker nodes", commands=[ Run(f"rhubarbe usrpoff {node_enb}"), # if usrp is on, load could be problematic... Run("rhubarbe", "load", *worker_ids, "-i", worker_image), Run("rhubarbe", "wait", *worker_ids), Run(f"rhubarbe usrpon {node_enb}"), # ensure a reset of the USRP on the enB node ], ), SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=False, verbose=verbose, label="turning off unused nodes", command=[ Run("rhubarbe bye --all " + "".join(f"~{x} " for x in nodes)) ] ) ] if quectel: prepare_quectel = SshJob( scheduler=scheduler, required=green_light, node=faraday, critical=True, verbose=verbose, label = f"Load image {quectel_image} on quectel UE nodes", commands=[ Run("rhubarbe", "usrpoff", *quectel_ids), Run("rhubarbe", "load", *quectel_ids, "-i", quectel_image), Run("rhubarbe", "wait", *quectel_ids), Run("rhubarbe", "usrpon", *quectel_ids), ], ), ########## if quectel: # wait 30s for Quectel modules show up wait_quectel_ready = PrintJob( "Let Quectel modules show up", scheduler=scheduler, required=prepare_quectel, sleep=30, label="sleep 30s for the Quectel modules to show up" ) # run the Quectel Connection Manager as a service on each Quectel UE node quectelCM_service = Service( command="quectel-CM -s oai.ipv4 -4", service_id="QuectelCM", verbose=verbose, ) init_quectel_nodes = [ SshJob( scheduler=scheduler, required=wait_quectel_ready, node=node, critical=True, verbose=verbose, label=f"Init Quectel UE on fit node {id}", commands = [ RunScript(find_local_embedded_script("nodes.sh"), "check-quectel-on", includes=INCLUDES), quectelCM_service.start_command(), ], ) for id, node in nodes_quectel_index.items() ] # wait 20s for Quectel Connection Manager to start up wait_quectelCM_ready = PrintJob( "Let QuectelCM start up", scheduler=scheduler, required=init_quectel_nodes, sleep=20, label="Sleep 20s for the Quectel Connection Manager(s) to start up" ) detach_quectel_nodes = [ SshJob( scheduler=scheduler, required=wait_quectelCM_ready, node=node, critical=True, verbose=verbose, label=f"Detach Quectel UE on fit node {id}", command = RunScript(find_local_embedded_script("nodes.sh"), "quectel-detach", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] ########## # Initialize k8s on the master node init_master = SshJob( scheduler=scheduler, required=green_light, node=master, critical=True, verbose=verbose, label = f"Install and launch k8s on the master {node_master}", commands = [ Run("swapoff -a"), Run("hostnamectl set-hostname master-node"), Run("kubeadm init --pod-network-cidr=10.244.0.0/16 > /tmp/join_msg.txt"), Run("tail -2 /tmp/join_msg.txt > /tmp/join_msg"), Run("mkdir -p $HOME/.kube"), Run("cp -i /etc/kubernetes/admin.conf $HOME/.kube/config"), Run("chown $(id -u):$(id -g) $HOME/.kube/config"), Run("kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml"), Run("kubectl get pods --all-namespaces"), ], ) init_workers = [ SshJob( scheduler=scheduler, required=init_master, node=node, critical=True, verbose=verbose, label=f"Init k8s on fit node {id} and join the cluster", commands = [ Run("swapoff -a"), Run("increase-control-mtu"), Run(f"scp -o 'StrictHostKeyChecking no' {fit_master}:/tmp/join_msg /tmp/join_msg"), Run("chmod a+x /tmp/join_msg"), Run("/tmp/join_msg"), ], ) for id, node in worker_index.items() ] # wait 10s for K8S nodes setup wait_k8nodes_ready = PrintJob( "Let k8s set up", scheduler=scheduler, required=init_workers, sleep=10, label="sleep 10s for the k8s nodes to settle" ) init_kube5g = SshJob( scheduler=scheduler, required = wait_k8nodes_ready, node = master, verbose=verbose, label = f"Add oai:ran label to oai-ran pod on {node_enb} and start 5GOperator pod", commands = [ Run("kubectl get nodes"), # add label to the eNB node to help k8s scheduler selects the right fit node Run(f"kubectl label nodes fit{node_enb} oai=ran"), Run("kubectl get nodes -Loai"), ## retrieve the kube5g operator #Run("git clone -b develop [email protected]:mosaic5g/kube5g.git"), # install a few dependencies Run("apt install -y python3-pip"), Run("pip3 install --upgrade pip"), Run("pip3 install ruamel.yaml==0.16.12 colorlog==4.6.2"), Run("sed -i 's/oairan:v1-1.0-1/oairan:v1-1.0-3/g' /root/kube5g/common/config-manager/conf_global_default.yaml"), # specify the R2lab specific configuration Run("cd /root/kube5g/common/config-manager; ./conf-manager.py -s conf_short_r2lab.yaml"), # apply the R2lab CRD Run("cd /root/kube5g/openshift/kube5g-operator; ./k5goperator.sh -n"), # start the kube5g operator pod Run("cd /root/kube5g/openshift/kube5g-operator; ./k5goperator.sh container start"), Run("kubectl get pods"), ], ) # wait 30s for K8S 5G Operator setup wait_k8_5GOp_ready = PrintJob( "Let 5G Operator set up", scheduler=scheduler, required=init_kube5g, sleep=30, label="wait 30s for the 5G Operator pod to settle" ) if only_kube5g: finish = SshJob( scheduler=scheduler, required = wait_k8_5GOp_ready, node = master, verbose=verbose, label = f"showing nodes and pods before leaving", commands = [ Run("kubectl get nodes -Loai"), Run("kubectl get pods"), ], ) else: if disaggregated_cn: cn_type="disaggregated-cn" # setup_time = 120 setup_time = 200 else: cn_type="all-in-one" # setup_time = 60 setup_time = 140 if flexran: flexran_opt="flexran" else: flexran_opt="" run_kube5g = SshJob( scheduler=scheduler, required = wait_k8_5GOp_ready, node = master, verbose=verbose, label = f"deploy {operator_version} {cn_type} {flexran_opt} pods", commands = [ Run("kubectl get nodes -Loai"), Run(f"cd /root/kube5g/openshift/kube5g-operator; ./k5goperator.sh deploy {operator_version} {cn_type} {flexran_opt}"), Run("kubectl get pods"), ], ) # Coffee Break -- wait 1 or 2mn for K8S 5G pods setup wait_k8_5Gpods_ready = PrintJob( "Let all 5G pods set up", scheduler=scheduler, required=run_kube5g, sleep=setup_time, label=f"waiting {setup_time}s for all 5G pods to settle" ) check_kube5g = SshJob( scheduler=scheduler, required = wait_k8_5Gpods_ready, node = master, verbose=verbose, label = "Check which pods are deployed", commands = [ Run("kubectl get nodes -Loai"), Run("kubectl get pods"), ], ) if drone: # the place where runtime variables get stored env = Variables() # # Define and run all the services to launch the Drone app locally on a firefox browser # drone_service = Service( command=f"python /root/mosaic5g/store/sdk/frontend/drone/drone.py --port=8088 --tasks --address=192.168.3.{node_enb}", service_id="drone_app", verbose=verbose, ) k8s_port9999_fwd_service = Service( command=Deferred("kubectl port-forward {{flexran_pod}} 9999:9999 --address 0.0.0.0", env), service_id="k8s-port9999-fwd", verbose=verbose, # somehow this is required for kubectl to run properly environ={'KUBECONFIG': '/etc/kubernetes/admin.conf'} ) # can't use a Service instance on the local box if it's not a Linux # and we have macs... local_port_fwd = (f"ssh -f -N -4" f" -L9999:192.168.3.{node_master}:9999" f" -L8088:192.168.3.{node_enb}:8088" f" -o ExitOnForwardFailure=yes" f" {slicename}@faraday.inria.fr") browser_service = Service( command=f"sleep 10; {cmd_open} http://127.0.0.1:8088/", service_id="drone_browser", verbose=verbose, ) run_drone=SshJob( scheduler=scheduler, required=check_kube5g, node=worker_index[node_enb], verbose=verbose, label=f"Run the drone app on worker node {node_enb} as a service", commands=[ drone_service.start_command(), ], ) get_flexran_podname=SshJob( scheduler=scheduler, required=check_kube5g, node=master, verbose=verbose, label=f"Retrieve the name of the FlexRAN pod", commands=[ # xxx here Run("kubectl get --no-headers=true pods -l app=flexran -o custom-columns=:metadata.name", capture=Capture('flexran_pod', env)), ], ) run_k8s_port9999_fwd=SshJob( scheduler=scheduler, required=get_flexran_podname, node=master, verbose=verbose, label=f"Run port forwarding on the master node as a service", commands=[ k8s_port9999_fwd_service.start_command(), ], ) # On the local machine, impossible to use Services as the latter uses systemd-run, only available on Linux run_local_ports_fwd = SshJob( scheduler=scheduler, required = check_kube5g, node = LocalNode(), verbose=verbose, label = f"Forward local ports 8088 and 9999", command=Run(local_port_fwd + "&", ignore_outputs=True), ) if run_browser: run_local_browser = SshJob( scheduler=scheduler, required = (run_drone, run_k8s_port9999_fwd, run_local_ports_fwd), node = LocalNode(), verbose=verbose, label = f"Run the browser on the local node in background", command=browser_service.command+"&", ) phones_requirements=run_local_browser else: phones_requirements=run_k8s_port9999_fwd else: phones_requirements=check_kube5g ########## Test phone(s) connectivity sleeps_ran = (20, 25) phone_msgs = [f"wait for {sleep}s for eNB to start up before waking up phone{id}" for sleep, id in zip(sleeps_ran, phones)] wait_commands = [f"echo {msg}; sleep {sleep}" for msg, sleep in zip(phone_msgs, sleeps_ran)] sleeps_phone = (15, 20) phone2_msgs = [f"wait for {sleep}s for phone{id} before starting tests" for sleep, id in zip(sleeps_phone, phones)] wait2_commands = [f"echo {msg}; sleep {sleep}" for msg, sleep in zip(phone2_msgs, sleeps_phone)] job_start_phones = [ SshJob( node=faraday, commands=[ Run(wait_command), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-on", includes=INCLUDES), Run(wait2_command), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-check-cx", includes=INCLUDES), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-start-app", includes=INCLUDES), ], label=f"turn off airplane mode on phone {id}", required=phones_requirements, scheduler=scheduler) for id, wait_command, wait2_command in zip(phones, wait_commands, wait2_commands) ] if quectel: # wait 60s for Quectel connection(s) to set up wait_before_attach_quectel = PrintJob( "Wait again 30s before attaching Quectel device(s)", scheduler=scheduler, required=(job_start_phones,check_kube5g,detach_quectel_nodes), sleep=30, label="Sleep 30s before attaching Quectel device(s)" ) job_attach_quectel = [ SshJob( scheduler=scheduler, required=wait_before_attach_quectel, node=node, critical=True, verbose=verbose, label=f"Attach Quectel UE on fit node {id}", command = RunScript(find_local_embedded_script("nodes.sh"), "quectel-attach", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] # wait 30s for Quectel connection(s) to set up wait_quectel_cx_ready = PrintJob( "Let the Quectel connection(s) set up", scheduler=scheduler, required=job_attach_quectel, sleep=30, label="Sleep 30s for the Quectel connection(s) to set up" ) test_quectel_cx = [ SshJob( scheduler=scheduler, required=wait_quectel_cx_ready, node=node, critical=False, verbose=verbose, label=f"Check the Quectel cx on fit node {id}", command = RunScript(find_local_embedded_script("nodes.sh"), "check-quectel-cx", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] ########## # Update the .dot and .png file for illustration purposes scheduler.check_cycles() name = "deploy-kube5g" print(10*'*', 'See main scheduler in', scheduler.export_as_pngfile(name)) # orchestration scheduler jobs if verbose: scheduler.list() if dry_run: return True if not scheduler.orchestrate(): print(f"RUN KO : {scheduler.why()}") scheduler.debrief() return False print(f"RUN OK, you can log now on master node {fit_master} to manually change the scenario") print(80*'*')
def one_run(*, protocol, interference, run_name=default_run_name, slicename=default_slicename, tx_power, phy_rate, antenna_mask, channel, load_images=False, node_ids=DEFAULT_NODE_IDS, src_ids=DEFAULT_SRC_IDS, dest_ids=DEFAULT_DEST_IDS, scrambler_id=DEFAULT_SCRAMBLER_ID, tshark=False, map=False, warmup=False, route_sampling=False, iperf=False, verbose_ssh=False, verbose_jobs=False, dry_run=False, run_number=None): """ Performs data acquisition on all nodes with the following settings Arguments: tx_power: in dBm, a string like 5, 10 or 14. Corresponds to the transmission power. phy_rate: a string among 1, 54. Correspond to the wifi rate. antenna_mask: a string among 1, 3, 7. channel: a string like e.g. 1 or 40. Correspond to the channel. protocol: a string among batman , olsr. Correspond to the protocol interference : in amplitude percentage, a string like 15 or 20. Correspond to the power of the noise generated in the spectrum. Can be either None or "None" to mean no interference. run_name: the name for a subdirectory where all data will be kept successive runs should use the same name for further visualization slicename: the Unix login name (slice name) to enter the gateway load_images: a boolean specifying whether nodes should be re-imaged first node_ids: a list of node ids to run the scenario against; strings or ints are OK; tshark: a boolean specifying wether we should format/parse the .pcap. map: a boolean specifying wether we should fetch/parse the route tables of the nodes. warmup: a boolean specifying whether we should run a ping before the experiment to be certain of the stabilisation on the network. src_ids: a list of nodes from which we will launch the ping from. strings or ints are OK. ping_messages : the number of ping packets that will be generated """ # set default for the nodes parameter node_ids = ([int(id) for id in node_ids] if node_ids is not None else DEFAULT_NODE_IDS) src_ids = ([int(id) for id in src_ids] if src_ids is not None else DEFAULT_SRC_IDS) dest_ids = ([int(id) for id in dest_ids] if dest_ids is not None else DEFAULT_NODE_IDS) # all nodes - i.e. including sources and destinations - # need to run the protocol node_ids = list(set(node_ids).union(set(src_ids).union(set(dest_ids)))) if interference == "None": interference = None # open result dir no matter what run_root = naming_scheme( run_name=run_name, protocol=protocol, interference=interference, autocreate=True) # fix me trace = run_root / f"trace-{%m-%d-%H-%M}" ref_time = apssh_time() trace = run_root / f"trace-{ref_time}" try: with trace.open('w') as feed: def log_line(line): time_line(line, file=feed) load_msg = f"{'WITH' if load_images else 'NO'} image loading" interference_msg = (f"interference={interference} " f"from scrambler={scrambler_id}") nodes = " ".join(str(n) for n in node_ids) srcs = " ".join(str(n) for n in src_ids) dests = " ".join(str(n) for n in dest_ids) ping_labels = [ f"PING {s} ➡︎ {d}" for s in src_ids # and on the destination for d in dest_ids if d != s ] log_line(f"output in {run_root}") log_line(f"trace in {trace}") log_line(f"protocol={protocol}") log_line(f"{load_msg}") log_line(f"{interference_msg}") log_line("----") log_line(f"Selected nodes : {nodes}") log_line(f"Sources : {srcs}") log_line(f"Destinations : {dests}") for label in ping_labels: log_line(f"{label}") log_line("----") for feature in ('warmup', 'tshark', 'map', 'route_sampling', 'iperf'): log_line(f"Feature {feature}: {locals()[feature]}") except Exception as exc: print(f"Cannot write into {trace} - aborting this run") print(f"Found exception {type(exc)} - {exc}") return False # # dry-run mode # just display a one-liner with parameters # prelude = "" if not dry_run else "dry_run:" with trace.open() as feed: print(f"**************** {ref_time} one_run #{run_number}:") for line in feed: print(prelude, line, sep='', end='') if dry_run: return True # the nodes involved faraday = SshNode(hostname=default_gateway, username=slicename, formatter=TimeColonFormatter(), verbose=verbose_ssh) # this is a python dictionary that allows to retrieve a node object # from an id node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) for id in node_ids } # extracts for sources and destinations src_index = {id:node for (id, node) in node_index.items() if id in src_ids} dest_index = {id:node for (id, node) in node_index.items() if id in dest_ids} if interference: node_scrambler = SshNode( gateway=faraday, hostname=fitname(scrambler_id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) # the global scheduler scheduler = Scheduler(verbose=verbose_jobs) ########## check_lease = SshJob( scheduler=scheduler, node=faraday, verbose=verbose_jobs, label="rhubarbe check lease", command=Run("rhubarbe leases --check", label="rlease"), ) # load images if requested green_light = check_lease # at some point we did not load the scrambler if interference was None # and that was a way to run faster loads with no interference # but now we always load the scrambler node with gnuradio # this is because when we do runs.py -i None 15 30 ... # then the first call to one_run is with interference being None # but it is still important to load the scrambler if load_images: # copy node_ids load_ids = node_ids[:] load_ids.append(scrambler_id) # the nodes that we **do not** use should be turned off # so if we have selected e.g. nodes 10 12 and 15, we will do # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15 negated_node_ids = [f"~{id}" for id in load_ids] # we can do these three things in parallel ready_jobs = [ SshJob(node=faraday, required=green_light, scheduler=scheduler, verbose=verbose_jobs, command=Run("rhubarbe", "off", "-a", *negated_node_ids, label="turn off unused nodes")), SshJob(node=faraday, required=green_light, scheduler=scheduler, verbose=verbose_jobs, label="load batman image", command=Run("rhubarbe", "load", "-i", "batman-olsr", *node_ids, label=f"load ubuntu on {node_ids}")), SshJob( node=faraday, required=green_light, scheduler=scheduler, verbose=verbose_jobs, label="load gnuradio image", command=Run("rhubarbe", "load", "-i", "batman-olsr-gnuradio", scrambler_id, label=f"load gnuradio on {scrambler_id}")), ] # replace green_light in this case green_light = SshJob( node=faraday, required=ready_jobs, scheduler=scheduler, verbose=verbose_jobs, label="wait for nodes to come up", command=Run("rhubarbe", "wait", *load_ids)) ########## # setting up the wireless interface on all nodes # # provide node-utilities with the ranges/units it expects frequency = channel_frequency[int(channel)] # tx_power_in_mBm not in dBm tx_power_driver = tx_power * 100 #just in case somme services failed in the previous experiment reset_failed_services_job = [ SshJob( node=node, verbose=verbose_jobs, label="reset failed services", command=Run("systemctl reset-failed", label="reset-failed services")) for id, node in node_index.items() ] reset_failed_services = Scheduler( *reset_failed_services_job, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="Reset failed services") init_wireless_sshjobs = [ SshJob( node=node, verbose=verbose_jobs, label=f"init {id}", command=RunScript( "node-utilities.sh", f"init-ad-hoc-network-{WIRELESS_DRIVER}", WIRELESS_DRIVER, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver, label="init add-hoc network"), ) for id, node in node_index.items()] init_wireless_jobs = Scheduler( *init_wireless_sshjobs, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="Initialisation of wireless chips") if interference: # Run uhd_siggen with the chosen power init_scrambler_job = SshJob( scheduler=scheduler, required=green_light, forever=True, node=node_scrambler, verbose=verbose_jobs, #TODO : If exit-signal patch is done add exit-signal=["TERM"] # to this run object and call uhd_siggen directly commands=[RunScript("node-utilities.sh", "init-scrambler", label="init scrambler"), Run(f"systemd-run --unit=uhd_siggen -t ", f"uhd_siggen -a usrp -f {frequency}M", f"--sine --amplitude 0.{interference}", label="systemctl start uhd_siggen") ] ) green_light = [init_wireless_jobs, reset_failed_services] # then install and run batman on fit nodes run_protocol_job = [ SshJob( # scheduler=scheduler, node=node, label=f"init and run {protocol} on fit node {id}", verbose=verbose_jobs, # CAREFUL : These ones use sytemd-run # with the ----service-type=forking option! command=RunScript("node-utilities.sh", f"run-{protocol}", label=f"run {protocol}"), ) for id, node in node_index.items()] run_protocol = Scheduler( *run_protocol_job, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="init and run routing protocols") green_light = run_protocol # after that, run tcpdump on fit nodes, this job never ends... if tshark: run_tcpdump_job = [ SshJob( # scheduler=scheduler_monitoring, node=node, forever=True, label=f"run tcpdump on fit node {id}", verbose=verbose_jobs, command=[ Run("systemd-run -t --unit=tcpdump", f"tcpdump -U -i moni-{WIRELESS_DRIVER}", f"-y ieee802_11_radio -w /tmp/fit{id}.pcap", label=f"tcpdump {id}") ] ) for id, node in node_index.items() ] run_tcpdump = Scheduler( *run_tcpdump_job, scheduler=scheduler, required=green_light, forever=True, verbose=verbose_jobs, label="Monitoring - tcpdumps") # let the wireless network settle settle_scheduler = Scheduler( scheduler=scheduler, required=green_light, ) if warmup: # warmup pings don't need to be sequential, so let's # do all the nodes at the same time # on a given node though, we'll ping the other ends sequentially # see the graph for more warmup_jobs = [ SshJob( node=node_s, verbose=verbose_jobs, commands=[ RunScript("node-utilities.sh", "my-ping", f"10.0.0.{d}", warmup_ping_timeout, warmup_ping_interval, warmup_ping_size, warmup_ping_messages, f"warmup {s} ➡︎ {d}", label=f"warmup {s} ➡︎ {d}") for d in dest_index.keys() if s != d ] ) # for each selected experiment nodes for s, node_s in src_index.items() ] warmup_scheduler = Scheduler( *warmup_jobs, scheduler=settle_scheduler, verbose=verbose_jobs, label="Warmup pings") settle_wireless_job2 = PrintJob( "Let the wireless network settle after warmup", sleep=settle_delay_shorter, scheduler=settle_scheduler, required=warmup_scheduler, label=f"settling-warmup for {settle_delay_shorter} sec") # this is a little cheating; could have gone before the bloc above # but produces a nicer graphical output # we might want to help asynciojobs if it offered a means # to specify entry and exit jobs in a scheduler settle_wireless_job = PrintJob( "Let the wireless network settle", sleep=settle_delay_long, scheduler=settle_scheduler, label=f"settling for {settle_delay_long} sec") green_light = settle_scheduler if iperf: iperf_service_jobs = [ SshJob( node=node_d, verbose=verbose_jobs, forever=True, commands=[ Run("systemd-run -t --unit=iperf", "iperf -s -p 1234 -u", label=f"iperf serv on {d}"), ], ) for d, node_d in dest_index.items() ] iperf_serv_sched = Scheduler( *iperf_service_jobs, verbose=verbose_jobs, label="Iperf Servers", # for a nicer graphical output # otherwise the exit arrow # from scheduler 'iperf mode' # to job 'settling for 60s' # gets to start from this box forever=True, ) iperf_cli = [ SshJob( node=node_s, verbose=verbose_jobs, commands=[ Run("sleep 7", label=""), Run(f"iperf", f"-c 10.0.0.{d} -p 1234", f"-u -b {phy_rate}M -t 60", f"-l 1024 > IPERF-{s:02d}-{d:02d}", label=f"run iperf {s} ➡︎ {d}") ] ) for s, node_s in src_index.items() for d, node_d in dest_index.items() if s != d ] iperf_cli_sched = Scheduler( Sequence(*iperf_cli), verbose=verbose_jobs, label="Iperf Clients") iperf_stop = [ SshJob(node=node_d, verbose=verbose_jobs, label=f"Stop iperf on {d}", command=Run("systemctl stop iperf")) for d, node_d in dest_index.items() ] iperf_stop_sched = Scheduler( *iperf_stop, required=iperf_cli_sched, verbose=verbose_jobs, label="Iperf server stop") iperf_fetch = [ SshJob(node=node_s, verbose=verbose_jobs, command=Pull( remotepaths=[f"IPERF-{s:02d}-{d:02d}"], localpath=str(run_root), label="fetch iperf {s} ➡︎ {d}") ) for s, node_s in src_index.items() for d, node_d in dest_index.items() if s != d ] iperf_fetch_sched = Scheduler( *iperf_fetch, required=iperf_stop_sched, verbose=verbose_jobs, label="Iperf fetch report") iperf_jobs = [iperf_serv_sched, iperf_cli_sched, iperf_stop_sched, iperf_fetch_sched] iperf_sched = Scheduler( *iperf_jobs, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="Iperf Module") settle_wireless_job_iperf = PrintJob( "Let the wireless network settle", sleep=settle_delay_shorter, scheduler=scheduler, required=iperf_sched, label=f"settling-iperf for {settle_delay_shorter} sec") green_light = settle_wireless_job_iperf # create all the tracepath jobs from the first node in the list if map: map_jobs = [ SshJob( node=node, label=f"Generating ROUTE file for proto {protocol} on node {id}", verbose=verbose_jobs, commands=[ RunScript(f"node-utilities.sh", f"route-{protocol}", f"> ROUTE-TABLE-{id:02d}", label="get route table"), Pull(remotepaths=[f"ROUTE-TABLE-{id:02d}"], localpath=str(run_root), label="") ], ) for id, node in node_index.items() ] map_scheduler = Scheduler( *map_jobs, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="Snapshoting route files") green_light = map_scheduler if route_sampling: route_sampling_jobs = [ SshJob( node=node, label=f"Route sampling service for proto {protocol} on node {id}", verbose=False, forever=True, commands=[ Push(localpaths=["route-sample-service.sh"], remotepath=".", label=""), Run("chmod +x route-sample-service.sh", label=""), Run("systemd-run -t --unit=route-sample", "/root/route-sample-service.sh", "route-sample", f"ROUTE-TABLE-{id:02d}-SAMPLED", protocol, label="start route-sampling"), ], ) for id, node in node_index.items() ] route_sampling_scheduler = Scheduler( *route_sampling_jobs, scheduler=scheduler, verbose=False, forever=True, label="Route Sampling services launch", required=green_light) ########## # create all the ping jobs, i.e. max*(max-1)/2 # this again is a python list comprehension # see the 2 for instructions at the bottom # # notice that these SshJob instances are not yet added # to the scheduler, we will add them later on # depending on the sequential/parallel strategy pings_job = [ SshJob( node=node_s, verbose=verbose_jobs, commands=[ Run(f"echo actual ping {s} ➡︎ {d} using {protocol}", label=f"ping {s} ➡︎ {d}"), RunScript("node-utilities.sh", "my-ping", f"10.0.0.{d}", ping_timeout, ping_interval, ping_size, ping_messages, f"actual {s} ➡︎ {d}", ">", f"PING-{s:02d}-{d:02d}", label=""), Pull(remotepaths=[f"PING-{s:02d}-{d:02d}"], localpath=str(run_root), label=""), ], ) # for each selected experiment nodes for s, node_s in src_index.items() for d, node_d in dest_index.items() if s != d ] pings = Scheduler( scheduler=scheduler, label="PINGS", verbose=verbose_jobs, required=green_light) # retrieve all pcap files from fit nodes stop_protocol_job = [ SshJob( # scheduler=scheduler, node=node, # required=pings, label=f"kill routing protocol on {id}", verbose=verbose_jobs, command=RunScript(f"node-utilities.sh", f"kill-{protocol}", label=f"kill-{protocol}"), ) for id, node in node_index.items() ] stop_protocol = Scheduler( *stop_protocol_job, scheduler=scheduler, required=pings, label="Stop routing protocols", ) if tshark: retrieve_tcpdump_job = [ SshJob( # scheduler=scheduler, node=nodei, # required=pings, label=f"retrieve pcap trace from fit{i:02d}", verbose=verbose_jobs, commands=[ Run("systemctl stop tcpdump", label="stop tcpdump"), #Run("systemctl reset-failed tcpdump"), #RunScript("node-utilities.sh", "kill-tcpdump", # label="kill-tcpdump"), Run( f"echo retrieving pcap trace and result-{i}.txt from fit{i:02d}", label=""), Pull(remotepaths=[f"/tmp/fit{i}.pcap"], localpath=str(run_root), label=""), ], ) for i, nodei in node_index.items() ] retrieve_tcpdump = Scheduler( *retrieve_tcpdump_job, scheduler=scheduler, required=pings, label="Retrieve tcpdump", ) if route_sampling: retrieve_sampling_job = [ SshJob( # scheduler=scheduler, node=nodei, # required=pings, label=f"retrieve sampling trace from fit{i:02d}", verbose=verbose_jobs, commands=[ # RunScript("node-utilities.sh", "kill-route-sample", protocol, # label = "kill route sample"), #RunScript("route-sample-service.sh", "kill-route-sample", # label="kill route sample"), Run("systemctl stop route-sample", label="stop route-sample"), Run( f"echo retrieving sampling trace from fit{i:02d}", label=""), Pull(remotepaths=[f"ROUTE-TABLE-{i:02d}-SAMPLED"], localpath=str(run_root), label=""), ], ) for i, nodei in node_index.items() ] retrieve_sampling = Scheduler( *retrieve_sampling_job, scheduler=scheduler, required=pings, verbose=verbose_jobs, label="Stop & retrieve route sampling", ) if tshark: parse_pcaps_job = [ SshJob( # scheduler=scheduler, node=LocalNode(), # required=retrieve_tcpdump, label=f"parse pcap trace {run_root}/fit{i}.pcap", verbose=verbose_jobs, #commands = [RunScript("parsepcap.sh", run_root, i)] command=Run("tshark", "-2", "-r", f"{run_root}/fit{i}.pcap", "-R", f"'(ip.dst==10.0.0.{i} && icmp) && radiotap.dbm_antsignal'", "-Tfields", "-e", "'ip.src'", "-e" "'ip.dst'", "-e", "'radiotap.dbm_antsignal'", ">", f"{run_root}/result-{i}.txt", label=f"parsing pcap from {i}"), ) for i in node_ids ] parse_pcaps = Scheduler( *parse_pcaps_job, scheduler=scheduler, required=retrieve_tcpdump, label="Parse pcap", ) if interference: kill_uhd_siggen = SshJob( scheduler=scheduler, node=node_scrambler, required=pings, label=f"killing uhd_siggen on the scrambler node {scrambler_id}", verbose=verbose_jobs, commands=[Run("systemctl", "stop", "uhd_siggen"), #Run("systemctl reset-failed tcpdump"), ], ) kill_2_uhd_siggen = SshJob( scheduler=scheduler, node=faraday, required=kill_uhd_siggen, label=f"turning off usrp on the scrambler node {scrambler_id}", verbose=verbose_jobs, command=Run("rhubarbe", "usrpoff", scrambler_id), ) pings.add(Sequence(*pings_job)) # for running sequentially we impose no limit on the scheduler # that will be limitied anyways by the very structure # of the required graph # safety check scheduler.export_as_pngfile(run_root / "experiment-graph") if dry_run: scheduler.list() return True # if not in dry-run mode, let's proceed to the actual experiment ok = scheduler.run() # jobs_window=jobs_window) # close all ssh connections close_ssh_in_scheduler(scheduler) # give details if it failed if not ok: scheduler.debrief() scheduler.export_as_pngfile("debug") if ok and map: time_line("Creation of MAP files") post_processor = ProcessRoutes(run_root, src_ids, node_ids) post_processor.run() if ok and route_sampling: time_line("Creation of ROUTE SAMPLING files") post_processor = ProcessRoutes(run_root, src_ids, node_ids) post_processor.run_sampled() # data acquisition is done, let's aggregate results # i.e. compute averages #if ok and tshark: #post_processor = Aggregator(run_root, node_ids, antenna_mask) #post_processor.run() time_line("one_run done") return ok
def test_format(self): s = Scheduler() f = TerminalFormatter("%Y:%H:%S - @host@:@line@", verbose=True) n = SshNode(localhostname(), username=localuser(), formatter=f) s.add(SshJob(node=n, commands=[Run("echo LINE1"), Run("echo LINE2")])) s.run()
########## # create an orchestration scheduler scheduler = Scheduler() # the job to wait before proceeding ready_requirement = None # has the user requested to load images ? # if so, we just need to wait for 2 jobs to complete instead of 1 if args.load: ready_requirement = [ SshJob( node=faraday, commands=[ Run('rhubarbe load -i ubuntu', hostnamea, hostnameb), Run('rhubarbe wait', hostnamea, hostnameb), ], scheduler=scheduler, ), SshJob( node=faraday, commands=[ Run('rhubarbe bye -a', '~' + hostnamea, '~' + hostnameb) ], scheduler=scheduler, ) ] ########## # setting up the data interface on both fit01 and fit02
username="******", verbose=verbose_ssh, formatter=TimeColonFormatter()) ########## # create an orchestration scheduler scheduler = Scheduler() ########## check_lease = SshJob( # checking the lease is done on the gateway node=faraday, # this means that a failure in any of the commands # will cause the scheduler to bail out immediately critical=True, command=Run("rhubarbe leases --check"), scheduler=scheduler, ) ########## # setting up the wireless interface on both fit01 and fit02 init_node_01 = SshJob( node=node1, command=RunScript( "B3-wireless.sh", "init-ad-hoc-network", wireless_driver, "foobar", 2412, ), required=check_lease,