Пример #1
0
def experimentCleanup(container_name):
    '''Cleanup for experiments'''
    perturbs.stopChaos(container_name)
    container = container_api.getContainer(container_name)
    monitoring.stopMonitoring(container)
    container.stop()
    print('🦀🦀🦀 experiments aborted, cleaned up')
Пример #2
0
def premade_external(name, cmd, pid=None):
    '''A bunch of premade auto completed perturbation'''
    container = container_api.getContainer(name)
    if not cmd:
        print('Invalid cmd %s' % cmd)
        exit()
    sysfault.applyFault(container, cmd, pid)
    print('Started sysfault %s on %s' % (cmd, container.name))
Пример #3
0
def start(name):
    '''Start to monitor container with given name'''
    # First do some simple verification of command.
    container = container_api.getContainer(name)

    if container_api.hasMonitoring(name):
        print('Container %s already has monitoring' % name)
        return
    # Now start monitoring.
    monitoring.startMonitoring(container)
Пример #4
0
def start(name, delay_enter, delay_exit, error, signal, syscall, when):
    '''Start syscall perturbation on container'''
    container = container_api.getContainer(name)
    # Injects the fault 'Error NO ENTity' on open the first time and every second time after after that.
    fault = sysfault.Fault(delay_enter=delay_enter,
                           delay_exit=delay_exit,
                           error=error,
                           signal=signal,
                           syscall=syscall,
                           when=when)
    sysfault.applyFault(container, fault, None)
    print('Started sysfault %s on %s' % (fault, container.name))
Пример #5
0
def stop(name):
    '''Stop to monitor container with given name'''
    container = container_api.getContainer(name)
    monitoring.stopMonitoring(container)
Пример #6
0
def stopChaos(name):
    '''Stops sysfault on container with given name'''
    container = container_api.getContainer(name)
    sysfault.clearFaults(container)
    print('Stopped sysfault on %s' % container.name)
Пример #7
0
def start(name, exp_time, pid_name, start_cmd, stop_cmd):
    '''Run experiments'''
    container_name = name
    perturbations = perturbs.getPremadeFaults()
    enumerated_perturbations = enumerate(perturbations)
    container_to_cleanup.append(container_name)
    signal.signal(signal.SIGTERM, signalCleanup)
    signal.signal(signal.SIGINT, signalCleanup)

    #0. Create experiment directory
    realpath = os.path.realpath('')
    experiment_dir = '%s/%s' % (realpath, container_name + '_exp')
    folders = None
    try:
        os.mkdir(experiment_dir)
    except FileExistsError:
        # Existing experiment run found, can we continue it?
        # List all directories.
        folders = os.listdir(experiment_dir)
        # Convert to int as to be able to sort.
        folders = list(map(int, folders))
        folders.sort()
        if len(folders) is 0:
            newest = 0
        else:
            goal_list_of_folders = set(list(range(0, 180)))
            current_list_of_folders = set(folders)
            newest = sorted(goal_list_of_folders - current_list_of_folders)[0]
        # Skip perturbations already completed.
        enumerated_perturbations = enumerate(perturbations[newest:], newest)
        print('🦀 Existing experiment detected, continuing from %s/%s' %
              (newest, perturbations[newest]))

    #1. select perturbation & start monitoring
    length = len(perturbations)
    for index, p in enumerated_perturbations:
        print('🦀🦀🦀 %d/%d running experiment perturbation %s' %
              (index + 1, length, p))
        if folders is not None and index in folders:
            # Skip this one as output folder already exists.
            print('🦀 already done, skipping')
            continue

        # A) start container.
        start_proc = runCmd(start_cmd.split(' '))
        # B) assume we need to wait a bit for the above to start.
        time.sleep(20)
        # C) start monitoring
        container = container_api.getContainer(container_name)
        # first process, second one is pid.
        pid_to_monitor = container_api.getProcessesByNameExternal(
            container_name, pid_name)[0][1]
        monitoring.startMonitoring(container, pid_to_monitor)

        start_time = currentTimeS()

        #2. wait predetermined amount of time.
        printSleep(exp_time, info_str='for baseline#1')

        #3. start perturbation.
        # first process, first one is pid.
        pid_to_perturb = container_api.getProcessesByNameLocal(
            container_name, pid_name)[0][1]
        perturbs.premade_external(container_name, p, pid=pid_to_perturb)

        #4. wait predetermined amount of time.
        printSleep(exp_time, info_str='for perturbation')

        #5. stop perturbation.
        print('🦀 finished perturbing')
        perturbs.stopChaos(container_name)
        printSleep(exp_time, info_str='for baseline#2')

        #6. log results to files for future processing.
        print('🦀 logging files')
        end_time = currentTimeS()
        time_span = end_time - start_time

        output_dir = '%s/%s' % (experiment_dir, index)
        os.mkdir(output_dir)  #create output directory.
        filename = '%s/%s' % (output_dir, p)

        # SYSCALL
        with open(filename + '_syscall.csv', 'w', newline='') as file:
            metrics.syscallQuery(name,
                                 end_time,
                                 timespan=time_span,
                                 csvfile=file)

        # NETWORK HTTP
        with open(filename + '_nethttp.csv', 'w', newline='') as file:
            metrics.networkQuery(name,
                                 end_time,
                                 timespan=time_span,
                                 csvfile=file)

        # CPU
        with open(filename + '_cpu.csv', 'w', newline='') as file:
            metrics.cpuQuery(name, end_time, timespan=time_span, csvfile=file)

        # RAM
        with open(filename + '_mem.csv', 'w', newline='') as file:
            metrics.memQuery(name, end_time, timespan=time_span, csvfile=file)

        # NETWORK RECEIVE
        with open(filename + '_netrec.csv', 'w', newline='') as file:
            metrics.netreceiveQuery(name,
                                    end_time,
                                    timespan=time_span,
                                    csvfile=file)

        # NETWORK TRANSMIT
        with open(filename + '_netsend.csv', 'w', newline='') as file:
            metrics.nettransmitQuery(name,
                                     end_time,
                                     timespan=time_span,
                                     csvfile=file)

        # DISK IO
        with open(filename + '_io.csv', 'w', newline='') as file:
            metrics.ioQuery(name, end_time, timespan=time_span, csvfile=file)

        # HTTP LATENCY
        with open(filename + '_latency.csv', 'w', newline='') as file:
            metrics.latencyQuery(name,
                                 end_time,
                                 timespan=time_span,
                                 csvfile=file)

        # E) Stop monitoring
        monitoring.stopMonitoring(container)
        # D) Stop command
        start_proc.kill()
        if stop_cmd is not '':
            print(stop_cmd)
            stop_proc = runCmd(stop_cmd.split(' '))
            stop_proc.wait()

        #7. if not done, goto #2 and repeat #2-#7
        # loop, so happen by default.

    print('🦀 stopping monitoring')
    monitoring.stopMonitoring(container)
    print('🦀🦀🦀 experiments completed')