def generate(self,
                 pool_period,
                 max_flow_age,
                 threading_mode,
                 shm_size,
                 aeron_lib_path,
                 aeron_term_buffer_length,
                 aeron_ipc_term_buffer_length,
                 bw_emulation=True):
        number_of_gods = 0
        try:
            number_of_gods = len(
                docker.APIClient(base_url='unix:/' + DOCKER_SOCK).nodes())

        except Exception as e:
            msg = "DockerComposeFileGenerator.py requires special permissions in order to view cluster state.\n"
            msg += "please, generate the .yaml file on a manager node."
            print_error_named("compose_generator", msg)
            print_and_fail(e)

        self.print_header()
        self.print_bootstrapper(number_of_gods, pool_period, max_flow_age,
                                threading_mode, shm_size, aeron_lib_path,
                                aeron_term_buffer_length,
                                aeron_ipc_term_buffer_length, bw_emulation)
        for service in self.graph.services:
            self.print_service(self.graph.services[service])
        self.print_configs()
        self.print_networks()
示例#2
0
 def calculate_end_to_end_properties(self):
     total_not_drop_probability = 1.0
     self.max_bandwidth = None
     self.latency = 0
     self.jitter = 0
     for link in self.links:
         try:
             # Pick the smallest bandwidth
             if self.max_bandwidth is None:
                 self.max_bandwidth = link.bandwidth_bps
             if link.bandwidth_bps < self.max_bandwidth:
                 self.max_bandwidth = link.bandwidth_bps
             # Accumulate jitter by summing the variances
             self.jitter = sqrt( (self.jitter*self.jitter)+(link.jitter*link.jitter))
             # Latency is just a sum
             self.latency += float(link.latency)
             # Drop is product of reverse probabilities reversed
             # basically calculate the probability of not dropping across the entire path
             # and then invert it
             # Problem is similar to probability of getting at least one 6 in multiple dice rolls
             total_not_drop_probability *= (1.0-float(link.drop))
         except:
             print_and_fail("Provided link data is not valid: "
                 + str(link.latency) + "ms "
                 + str(link.drop) + "drop rate "
                 + link.bandwidth)
     self.RTT = self.latency*2
     self.drop = (1.0-total_not_drop_probability)
示例#3
0
 def new_bridge(self, name):
     bridge = NetGraph.Bridge(name)
     if len(self.get_nodes(name)) == 0:
         self.bridges[name] = [bridge]
     else:
         print_and_fail("Cant add bridge with name: " + name + ". Another node with the same name already exists")
     return bridge
示例#4
0
    def start_aeron_media_driver(self):
        if getenv('RUNTIME_EMULATION', 'true') != 'false':
            try:
                self.aeron_media_driver = Popen('/usr/bin/Aeron/aeronmd')
                print_named("god", "started aeron_media_driver.")

            except Exception as e:
                print_error("[Py (god)] failed to start aeron media driver.")
                print_and_fail(e)
示例#5
0
 def bandwidth_in_bps(self, bandwidth_string):
     if re.match(self.bandwidth_re, bandwidth_string) is None:
         print_and_fail("Bandwidth is not properly specified, accepted values must be: [0-9]+[KMG]bps")
     results = re.findall(self.bandwidth_re, bandwidth_string)
     base = results[0][0]
     multiplier = results[0][1]
     if multiplier == 'K':
         return int(base)*1000
     if multiplier == 'M':
         return int(base) * 1000 * 1000
     if multiplier == 'G':
         return int(base) * 1000 * 1000 * 1000
示例#6
0
def main():
    if len(sys.argv) < 4:
        print_and_fail("Missing arguments. emucore <topology> <container id>")
    else:
        topology_file = sys.argv[1]
    # For future reference: This topology file must not exceed 512KB otherwise docker refuses
    # to copy it as a config file, this has happened with the 2k scale-free topology...

    setup_container(sys.argv[2], sys.argv[3])

    # Because of the bootstrapper hack we cant get output from the emucore through standard docker logs...
    #sys.stdout = open("/var/log/need.log", "w")
    #sys.stderr = sys.stdout

    graph = NetGraph()

    parser = XMLGraphParser(topology_file, graph)
    parser.fill_graph()
    print_message("Done parsing topology")

    print_message("Resolving hostnames...")
    graph.resolve_hostnames()
    print_message("All hosts found!")

    print_message("Determining the root of the tree...")
    # Get our own ip address and set the root of the "tree"
    ownIP = get_own_ip(graph)
    graph.root = graph.hosts_by_ip[ip2int(ownIP)]

    if graph.root is None:
        print_and_fail(
            "Failed to identify current service instance in topology!")
    print_message("We are " + graph.root.name + "@" + ownIP)

    print_identified(graph, "Calculating shortest paths...")
    graph.calculate_shortest_paths()

    print_message("Parsing dynamic event schedule...")
    scheduler = parser.parse_schedule(graph.root, graph)

    signal(SIGTERM, lambda signum, frame: exit(0))

    print_message("Initializing network emulation...")
    manager = EmulationCore(graph, scheduler)
    manager.initialize()
    print_identified(graph, "Waiting for command to start experiment")
    sys.stdout.flush()
    sys.stderr.flush()

    if getenv('RUNTIME_EMULATION', 'true') != 'false':
        # Enter the emulation loop
        manager.emulation_loop()
    def print_networks(self):
        network = self.graph.links[0].network
        for link in self.graph.links:
            if link.network != network:
                print_and_fail(
                    "Multiple network support is not yet implemented!")

        print("networks:")
        print("  KollapsNet:")
        print("    external:")
        print("      name: " + network)
        print("  outside:")
        print("    driver: overlay")
        print("")
示例#8
0
    def calculate_shortest_paths(self):
#        start = time()
        # Dijkstra's shortest path implementation
        # Distance is number of hops
        if self.root is None:
            print_and_fail("Root of the tree has not been defined.")

        inf = float("inf")
        dist = {}
        Q = []
        for service in self.services:
            hosts = self.services[service]
            for host in hosts:
                distance = 0
                if host != self.root:
                    distance = inf
                entry = [distance, host]
                Q.append(entry)
                dist[host] = distance
        for bridge in self.bridges:
            b = self.bridges[bridge][0]
            Q.append([inf, b])
            dist[b] = inf

        self.paths[self.root] = NetGraph.Path([], self.path_counter)
        self.paths_by_id[self.path_counter] = self.paths[self.root]
        self.path_counter += 1
        while len(Q) > 0:
            Q.sort(key=lambda ls: ls[0])
            u = Q.pop(0)[1]  # type: NetGraph.Node
            for link in u.links:
                alt = dist[u] + 1
                if link.destination in dist: # if destination is a bridge, it could have been removed
                    if alt < dist[link.destination]:
                        node = link.destination
                        dist[node] = alt
                        # append to the previous path
                        path = self.paths[u].links[:]
                        path.append(link)
                        self.paths[node] = NetGraph.Path(path, self.path_counter)
                        self.paths_by_id[self.path_counter] = self.paths[node]
                        self.path_counter += 1
                        for e in Q:  # find the node in Q and change its priority
                            if e[1] == node:
                                e[0] = alt
示例#9
0
 def __init__(self, source, destination, latency, jitter, drop, bandwidth, bps, network):
     self.lock = Lock()
     self.index = 0
     self.source = source  # type: NetGraph.Node
     self.destination = destination  # type: NetGraph.Node
     try:
         self.latency = float(latency)
         self.drop = float(drop)
         self.jitter = float(jitter)
     except:
         print_and_fail("Provided link data is not valid: "
             + latency + "ms "
             + drop + "drop rate "
             + bandwidth)
     self.bandwidth = bandwidth  # type: str
     self.bandwidth_bps = bps  # type: int
     self.flows = []  # type: List[Tuple[int, int]]  # (RTT, Bandwidth)
     self.last_flows_count = 0
     self.network = network
示例#10
0
    def parse_services(self, experiment, services):
        for service in services:
            if service.tag != 'service':
                print_and_fail('Invalid tag inside <services>: ' + service.tag)
            if 'name' not in service.attrib or 'image' not in service.attrib:
                print_and_fail(
                    'A service needs a name and an image attribute.')
            if not service.attrib['name'] or not service.attrib['image']:
                print_and_fail(
                    'A service needs a name and an image attribute.')

            command = None
            if 'command' in service.attrib:
                command = service.attrib['command']

            shared = False
            if 'share' in service.attrib:
                shared = (service.attrib['share'] == "true")

            supervisor = False
            supervisor_port = 0
            if 'supervisor' in service.attrib:
                supervisor = True
                if 'port' in service.attrib:
                    supervisor_port = int(service.attrib['port'])

            reuse = True
            if 'reuse' in service.attrib:
                reuse = (service.attrib['reuse'] == "true")

            replicas = 1
            if 'replicas' in service.attrib:
                try:
                    replicas = int(service.attrib['replicas'])
                except:
                    print_and_fail(
                        'replicas attribute must be a valid integer.')
            replicas = self.calulate_required_replicas(service.attrib['name'],
                                                       replicas, experiment,
                                                       reuse)

            for i in range(replicas):
                srv = self.graph.new_service(service.attrib['name'],
                                             service.attrib['image'], command,
                                             shared, reuse, replicas)
                if supervisor:
                    self.supervisors.append(srv)
                    srv.supervisor_port = supervisor_port
                    srv.supervisor = True
 def generate(self, pool_period, max_flow_age, threading_mode, shm_size, aeron_lib_path, aeron_term_buffer_length, aeron_ipc_term_buffer_length, bw_emulation=True):
     number_of_gods = 0
     try:
         if os.getenv('KUBERNETES_SERVICE_HOST'):
             config.load_incluster_config()
         else:
             config.load_kube_config()
         
         number_of_gods = len(client.CoreV1Api().list_node().to_dict()["items"])
         
         
     except Exception as e:
         print_and_fail(e)
     
     self.print_roles()
     print("---")
     self.print_bootstrapper(number_of_gods, pool_period, max_flow_age, threading_mode, shm_size, aeron_lib_path, aeron_term_buffer_length, aeron_ipc_term_buffer_length, bw_emulation)
     print("---")
     for service in self.graph.services:
         self.print_service(self.graph.services[service])
         print("---")
     self.print_topology()
示例#12
0
 def parse_bridges(self, root):
     for bridge in root:
         if bridge.tag != 'bridge':
             print_and_fail('Invalid tag inside <bridges>: ' + bridge.tag)
         if 'name' not in bridge.attrib:
             print_and_fail('A bridge needs to have a name.')
         if not bridge.attrib['name']:
             print_and_fail('A bridge needs to have a name.')
         self.graph.new_bridge(bridge.attrib['name'])
示例#13
0
def main():
    try:
        if len(sys.argv) < 3:
            msg = "If you are calling " + sys.argv[
                0] + " from your workstation stop."
            msg += "This should only be used inside containers."

            sleep(20)

            print_and_fail(msg)

        mode = sys.argv[1]
        label = sys.argv[2]
        bootstrapper_id = sys.argv[3] if len(sys.argv) > 3 else None

        bootstrapper = None
        orchestrator = os.getenv('KOLLAPS_ORCHESTRATOR', 'swarm')

        if orchestrator == 'kubernetes':
            bootstrapper = KubernetesBootstrapper()

        elif orchestrator == 'swarm':
            bootstrapper = SwarmBootstrapper()

        # insert here any other bootstrappping class required by new orchestrators
        else:
            print_named(
                "bootstrapper",
                "Unrecognized orchestrator. Using default: Docker Swarm.")
            bootstrapper = SwarmBootstrapper()

        bootstrapper.bootstrap(mode, label, bootstrapper_id)

    except Exception as e:
        sys.stdout.flush()
        print_error(e)
        sleep(20)
示例#14
0
    def __init__(self, flow_collector, graph, event_scheduler, ip=None):
        self.graph = graph  # type: NetGraph
        self.scheduler = event_scheduler  # type: EventScheduler
        self.flow_collector = flow_collector
        self.produced = 0
        self.received = 0
        self.consumed = 0
        self.largest_produced_gap = -1
        self.stop_lock = Lock()

        self.aeron_lib = None
        self.aeron_id = None
        self.local_ips = {}
        self.remote_ips = {}

        link_count = len(self.graph.links)
        if link_count <= BYTE_LIMIT:
            self.link_unit = "1B"
        elif link_count <= SHORT_LIMIT:
            self.link_unit = "1H"
        else:
            print_and_fail("Topology has too many links: " + str(link_count))
        self.link_size = struct.calcsize("<" + self.link_unit)

        self.supervisor_count = 0
        self.peer_count = 0

        if ip is None:
            self.aeron_id = self.graph.root.ip
        else:
            self.aeron_id = ip2int(ip)
            # self.aeron_id = ip2int(socket.gethostbyname(socket.gethostname()))

        for service in self.graph.services:
            hosts = self.graph.services[service]
            for host in hosts:
                if host != self.graph.root:
                    self.peer_count += 1

                if host.supervisor:
                    self.supervisor_count += 1
        self.peer_count -= self.supervisor_count

        # setup python callback
        self.aeron_lib = ctypes.CDLL(AERON_LIB_PATH)

        if link_count <= BYTE_LIMIT:
            self.aeron_lib.init(self.aeron_id, False)
            self.flow_adding_func = self.aeron_lib.addFlow8

        else:
            self.aeron_lib.init(self.aeron_id, True)
            self.flow_adding_func = self.aeron_lib.addFlow16

        CALLBACKTYPE = CFUNCTYPE(c_voidp, c_ulong, c_uint, POINTER(c_uint))
        c_callback = CALLBACKTYPE(self.receive_flow)
        self.callback = c_callback  # keep reference so it does not get garbage collected
        self.aeron_lib.registerCallback(self.callback)

        self.dashboard_socket = socket.socket(socket.AF_INET,
                                              socket.SOCK_STREAM)
        self.dashboard_socket.bind(('0.0.0.0', CommunicationsManager.TCP_PORT))

        self.dashboard_thread = Thread(target=self.receive_dashboard_commands)
        self.dashboard_thread.daemon = True
        self.dashboard_thread.start()

        # TODO PG run through this again, rename variables to match new god logs functionality
        my_starting_links = []
        for key, path in self.graph.paths_by_id.items():
            if len(path.links
                   ) > 0 and path.links[0].index not in my_starting_links:
                my_starting_links.append(path.links[0].index)

        with open(LOCAL_IPS_FILE, 'r') as file:
            for line in file.readlines():
                self.aeron_lib.addLocalSubs(
                    int(line), len(my_starting_links),
                    (c_uint * len(my_starting_links))(*my_starting_links))

        with open(REMOTE_IPS_FILE, 'r') as file:
            for line in file.readlines():
                self.aeron_lib.addRemoteSubs(int(line))

        self.aeron_lib.startPolling()
示例#15
0
    def parse_schedule(self, service, graph):
        """
        :param service: NetGraph.Service
        :return:
        """
        XMLtree = ET.parse(self.file)
        root = XMLtree.getroot()
        if root.tag != 'experiment':
            print_and_fail(
                'Not a valid Kollaps topology file, root is not <experiment>')

        dynamic = None

        for child in root:
            if child.tag == 'dynamic':
                if dynamic is not None:
                    print_and_fail("Only one <dynamic> block is allowed.")
                dynamic = child

        scheduler = EventScheduler()
        first_join = -1.0
        first_leave = float('inf')

        # if there is no dynamic block than this instance joins straight away
        if dynamic is None:
            scheduler.schedule_join(0.0)
            return scheduler

        seed(12345)
        replicas = []
        for i in range(service.replica_count):
            replicas.append(
                [False, False,
                 False])  # Joined = False, Disconnected = False, Used = False

        # indexes for replicas entries
        JOINED = 0
        DISCONNECTED = 1
        USED = 2

        # there is a dynamic block, so check if there is anything scheduled for us
        for event in dynamic:
            if event.tag != 'schedule':
                print_and_fail("Only <schedule> is allowed inside <dynamic>")

            # parse time of event
            time = 0.0
            try:
                time = float(event.attrib['time'])
                if time < 0.0:
                    print_and_fail("time attribute must be a positive number")
            except ValueError as e:
                print_and_fail("time attribute must be a valid real number")

            if 'name' in event.attrib and 'time' in event.attrib and 'action' in event.attrib:
                node_name = event.attrib['name']
                bridge_names = []
                for bridge in list(graph.bridges.keys()) + list(
                        graph.removed_bridges.keys()):
                    bridge_names.append(bridge)

                # if a bridge is scheduled
                if node_name in bridge_names:
                    if event.attrib['action'] == 'join':
                        scheduler.schedule_bridge_join(time, graph, node_name)
                    elif event.attrib['action'] == 'leave':
                        scheduler.schedule_bridge_leave(time, graph, node_name)
                    continue

                # parse name of service. only process actions that target us
                if node_name != service.name:
                    continue

                # parse amount of replicas affected
                amount = 1
                if 'amount' in event.attrib:
                    amount = int(event.attrib['amount'])

                # parse action
                if event.attrib['action'] == 'join':
                    for i in range(amount):
                        available = False
                        id = 0
                        # Pick a random replica
                        while (not available):
                            id = randrange(0, service.replica_count)
                            available = not replicas[id][JOINED]
                            if not service.reuse_ip:
                                available = available and not replicas[id][USED]

                        # Mark the state
                        replicas[id][JOINED] = True
                        if not service.reuse_ip:
                            replicas[id][USED] = True

                        # if its us, schedule the action
                        if service.replica_id == id:
                            scheduler.schedule_join(time)
                            print_message(service.name + " replica " +
                                          str(service.replica_id) +
                                          " scheduled to join at " + str(time))
                        if first_join < 0.0:
                            first_join = time

                elif event.attrib['action'] == 'leave' or event.attrib[
                        'action'] == 'crash':
                    for i in range(amount):
                        up = False
                        id = 0
                        # Pick a random replica
                        while (not up):
                            id = randrange(0, service.replica_count)
                            up = replicas[id][JOINED]

                        # Mark the state
                        replicas[id][JOINED] = False

                        # if its us, schedule the action
                        if service.replica_id == id:
                            if event.attrib['action'] == 'leave':
                                scheduler.schedule_leave(time)
                                print_message(service.name + " replica " +
                                              str(service.replica_id) +
                                              " scheduled to leave at " +
                                              str(time))
                            elif event.attrib['action'] == 'crash':
                                scheduler.schedule_crash(time)
                                print_message(service.name + " replica " +
                                              str(service.replica_id) +
                                              " scheduled to crash at " +
                                              str(time))
                        if first_leave > time:
                            first_leave = time

                elif event.attrib['action'] == 'reconnect':
                    for i in range(amount):
                        disconnected = False
                        id = 0
                        # Pick a random replica
                        while (not disconnected):
                            id = randrange(0, service.replica_count)
                            disconnected = replicas[id][DISCONNECTED]

                        # Mark the state
                        replicas[id][DISCONNECTED] = False

                        # if its us, schedule the action
                        if service.replica_id == id:
                            print_message(service.name + " replica " +
                                          str(service.replica_id) +
                                          " scheduled to reconnect at " +
                                          str(time))
                            scheduler.schedule_reconnect(time)

                elif event.attrib['action'] == 'disconnect':
                    for i in range(amount):
                        connected = False
                        id = 0
                        # Pick a random replica
                        while (not connected):
                            id = randrange(0, service.replica_count)
                            connected = replicas[id][
                                JOINED] and not replicas[id][DISCONNECTED]

                        # Mark the state
                        replicas[id][DISCONNECTED] = True

                        # if its us, schedule the action
                        if service.replica_id == id:
                            print_message(service.name + " replica " +
                                          str(service.replica_id) +
                                          " scheduled to disconnect at " +
                                          str(time))
                            scheduler.schedule_disconnect(time)
                else:
                    print_and_fail(
                        "Unrecognized action: " + event.attrib['action'] +
                        " , allowed actions are join, leave, crash, disconnect, reconnect"
                    )

            #Do something dynamically with a link
            elif 'origin' in event.attrib and 'dest' in event.attrib and 'time' in event.attrib:

                #parse origin and destination
                origin = event.attrib['origin']
                destination = event.attrib['dest']

                if 'action' in event.attrib:  #link is joining or leaving
                    if event.attrib['action'] == 'leave':
                        scheduler.schedule_link_leave(time, graph, origin,
                                                      destination)
                    elif event.attrib['action'] == 'join':
                        #Link is already defined but has been removed before
                        if not 'upload' in event.attrib or not 'latency' in event.attrib:
                            scheduler.schedule_link_join(
                                time, graph, origin, destination)
                        #A completely new link with defined properties joins
                        elif not 'upload' in event.attrib and not 'latency' in event.attrib and not 'network' in event.attrib:
                            print_and_fail(
                                "Link description incomplete. For a new link, you must provide at least latency, upload, and network attributes."
                            )
                        else:
                            bandwidth = event.attrib['upload']
                            latency = float(event.attrib['latency'])
                            drop = 0
                            if 'drop' in event.attrib:
                                drop = float(event.attrib['drop'])
                            jitter = 0
                            if 'jitter' in event.attrib:
                                jitter = float(event.attrib['jitter'])
                            network = event.attrib['network']

                            scheduler.schedule_new_link(
                                time, graph, origin, destination, latency,
                                jitter, drop, bandwidth, network)
                            if 'download' in event.attrib:
                                bandwidth = event.attrib['download']
                                scheduler.schedule_new_link(
                                    time, graph, destination, origin, latency,
                                    jitter, drop, bandwidth, network)

                    else:
                        print_and_fail("Unrecognized action for link: " +
                                       event.attrib['action'] +
                                       ", allowed are join and leave")

                else:  #properties of link are changing
                    bandwidth = -1
                    if 'upload' in event.attrib:
                        bandwidth = graph.bandwidth_in_bps(
                            event.attrib['upload'])
                    latency = -1
                    if 'latency' in event.attrib:
                        latency = float(event.attrib['latency'])
                    drop = -1
                    if 'drop' in event.attrib:
                        drop = float(event.attrib['drop'])
                    jitter = -1
                    if 'jitter' in event.attrib:
                        jitter = float(event.attrib['jitter'])

                    scheduler.schedule_link_change(time, graph, origin,
                                                   destination, bandwidth,
                                                   latency, jitter, drop)

            else:
                print_and_fail(
                    '<schedule> must have either name, time and action attributes,'
                    + ' or link origin dest and properties attributes')

        # deal with auto join
        if first_join < 0.0:
            print_message(service.name + " scheduled to join at " + str(0.0))
            scheduler.schedule_join(0.0)
        if first_leave < first_join:
            print_and_fail("Dynamic: service " + service.name +
                           " leaves before having joined")

        scheduler.schedule_graph_changes()

        return scheduler
示例#16
0
    def fill_graph(self):
        XMLtree = ET.parse(self.file)
        root = XMLtree.getroot()
        if root.tag != 'experiment':
            print_and_fail(
                'Not a valid Kollaps topology file, root is not <experiment>')

        if 'boot' not in root.attrib:
            print_and_fail(
                '<experiment boot="?"> The experiment needs a valid boostrapper image name'
            )

        self.graph.bootstrapper = root.attrib['boot']
        services = None
        bridges = None
        links = None
        for child in root:
            if child.tag == 'services':
                if services is not None:
                    print_and_fail("Only one <services> block is allowed.")
                services = child
            elif child.tag == 'bridges':
                if bridges is not None:
                    print_and_fail("Only one <bridges> block is allowed.")
                bridges = child
            elif child.tag == 'links':
                if links is not None:
                    print_and_fail("Only one <links> block is allowed.")
                links = child
            elif child.tag == 'dynamic':
                pass
            else:
                print_and_fail('Unknown tag: ' + child.tag)

        # Links must be parsed last
        if services is None:
            print_and_fail("No services declared in topology description")
        self.parse_services(root, services)
        if bridges is not None:
            self.parse_bridges(bridges)
        if links is None:
            print_and_fail("No links declared in topology descritpion")
        self.parse_links(links)

        for service in self.supervisors:
            self.graph.set_supervisor(service)
示例#17
0
    def calulate_required_replicas(self, service, hardcoded_count, root,
                                   reuse):
        dynamic = None
        for child in root:
            if child.tag == 'dynamic':
                if dynamic is not None:
                    print_and_fail("Only one <dynamic> block is allowed.")
                dynamic = child

        if dynamic is None:
            return hardcoded_count

        # first we collect the join/leave/crash/disconnect/reconnect events
        # so we can later sort them and calculate the required replicas
        events = []  # type: List[Tuple[float, int, int]]
        JOIN = 1
        LEAVE = 2
        CRASH = 3
        DISCONNECT = 4
        RECONNECT = 5

        TIME = 0
        AMMOUNT = 1
        TYPE = 2

        has_joins = False

        for event in dynamic:
            if event.tag != 'schedule':
                print_and_fail("Only <schedule> is allowed inside <dynamic>")
            if 'name' in event.attrib and 'time' in event.attrib and 'action' in event.attrib:
                # parse name of service
                if event.attrib['name'] != service:
                    continue

                # parse time of event
                time = 0.0
                try:
                    time = float(event.attrib['time'])
                    if time < 0.0:
                        print_and_fail(
                            "time attribute must be a positive number")
                except ValueError as e:
                    print_and_fail(
                        "time attribute must be a valid real number")

                # parse amount
                amount = 1
                if 'amount' in event.attrib:
                    try:
                        amount = int(event.attrib['amount'])
                        if amount < 1:
                            print_and_fail(
                                "amount attribute must be an integer >= 1")
                    except ValueError as e:
                        print_and_fail(
                            "amount attribute must be an integer >= 1")

                # parse action
                if event.attrib['action'] == 'join':
                    has_joins = True
                    events.append((time, amount, JOIN))
                elif event.attrib['action'] == 'leave':
                    events.append((time, amount, LEAVE))
                elif event.attrib['action'] == 'crash':
                    events.append((time, amount, CRASH))
                elif event.attrib['action'] == 'disconnect':
                    events.append((time, amount, DISCONNECT))
                elif event.attrib['action'] == 'reconnect':
                    events.append((time, amount, RECONNECT))

        if not has_joins:
            return hardcoded_count

        events.sort(key=lambda event: event[TIME])
        max_replicas = 0
        cummulative_replicas = 0
        disconnected = 0

        # Calculate required replicas (and perform semantic checking)
        current_replicas = 0
        for event in events:
            if event[TYPE] == JOIN:
                current_replicas += event[AMMOUNT]
                cummulative_replicas += event[AMMOUNT]
            elif event[TYPE] == LEAVE or event[TYPE] == CRASH:
                current_replicas -= event[AMMOUNT]
            elif event[TYPE] == DISCONNECT:
                disconnected += event[AMMOUNT]
                if event[AMMOUNT] > current_replicas:
                    print_and_fail(
                        "Dynamic section for " + service +
                        " disconnects more replicas than are joined at second "
                        + str(event[TIME]))
            elif event[TYPE] == RECONNECT:
                disconnected -= event[AMMOUNT]
                if event[AMMOUNT] > disconnected:
                    print_and_fail(
                        "Dynamic section for " + service +
                        " reconnects more replicas than are disconnected at second "
                        + str(event[TIME]))
            if current_replicas < 0:
                print_and_fail(
                    "Dynamic section for " + service +
                    " causes a negative number of replicas at second " +
                    str(event[TIME]))
            if current_replicas > max_replicas:
                max_replicas = current_replicas

        if reuse:
            return max_replicas
        else:
            return cummulative_replicas
示例#18
0
    def parse_links(self, root):
        for link in root:
            if link.tag != 'link':
                print_and_fail('Invalid tag inside <links>: ' + link.tag)
            if 'origin' not in link.attrib or 'dest' not in link.attrib or 'latency' not in link.attrib or \
                    'upload' not in link.attrib or 'network' not in link.attrib:
                print_and_fail("Incomplete link description.")

            source_nodes = self.graph.get_nodes(link.attrib['origin'])
            destination_nodes = self.graph.get_nodes(link.attrib['dest'])

            jitter = 0
            if 'jitter' in link.attrib:
                jitter = link.attrib['jitter']
            drop = 0
            if 'drop' in link.attrib:
                drop = link.attrib['drop']

            bidirectional = ('download' in link.attrib)

            both_shared = (source_nodes[0].shared_link
                           and destination_nodes[0].shared_link)
            if both_shared:
                src_meta_bridge = self.create_meta_bridge()

                dst_meta_bridge = self.create_meta_bridge()
                # create a link between both meta bridges
                self.graph.new_link(src_meta_bridge, dst_meta_bridge,
                                    link.attrib['latency'], jitter, drop,
                                    link.attrib['upload'],
                                    link.attrib['network'])
                if bidirectional:
                    self.graph.new_link(dst_meta_bridge, src_meta_bridge,
                                        link.attrib['latency'], jitter, drop,
                                        link.attrib['download'],
                                        link.attrib['network'])
                # connect source to src meta bridge
                self.graph.new_link(link.attrib['origin'], src_meta_bridge, 0,
                                    0, 0.0, link.attrib['upload'],
                                    link.attrib['network'])
                if bidirectional:
                    self.graph.new_link(src_meta_bridge, link.attrib['origin'],
                                        0, 0, 0.0, link.attrib['download'],
                                        link.attrib['network'])
                # connect destination to dst meta bridge
                self.graph.new_link(dst_meta_bridge, link.attrib['dest'], 0, 0,
                                    0.0, link.attrib['upload'],
                                    link.attrib['network'])
                if bidirectional:
                    self.graph.new_link(link.attrib['dest'], dst_meta_bridge,
                                        0, 0, 0.0, link.attrib['download'],
                                        link.attrib['network'])
            elif source_nodes[0].shared_link:
                meta_bridge = self.create_meta_bridge()
                # create a link between meta bridge and destination
                self.graph.new_link(meta_bridge, link.attrib['dest'],
                                    link.attrib['latency'], jitter, drop,
                                    link.attrib['upload'],
                                    link.attrib['network'])
                if bidirectional:
                    self.graph.new_link(link.attrib['dest'], meta_bridge,
                                        link.attrib['latency'], jitter, drop,
                                        link.attrib['download'],
                                        link.attrib['network'])
                # connect origin to meta bridge
                self.graph.new_link(link.attrib['origin'], meta_bridge, 0, 0,
                                    0.0, link.attrib['upload'],
                                    link.attrib['network'])
                if bidirectional:
                    self.graph.new_link(meta_bridge, link.attrib['origin'], 0,
                                        0, 0.0, link.attrib['download'],
                                        link.attrib['network'])
            elif destination_nodes[0].shared_link:
                meta_bridge = self.create_meta_bridge()
                # create a link between origin and meta_bridge
                self.graph.new_link(link.attrib['origin'], meta_bridge,
                                    link.attrib['latency'], jitter, drop,
                                    link.attrib['upload'],
                                    link.attrib['network'])
                if bidirectional:
                    self.graph.new_link(meta_bridge, link.attrib['origin'],
                                        link.attrib['latency'], jitter, drop,
                                        link.attrib['download'],
                                        link.attrib['network'])
                # connect meta bridge to destination
                self.graph.new_link(meta_bridge, link.attrib['dest'], 0, 0,
                                    0.0, link.attrib['upload'],
                                    link.attrib['network'])
                if bidirectional:
                    self.graph.new_link(link.attrib['dest'], meta_bridge, 0, 0,
                                        0.0, link.attrib['download'],
                                        link.attrib['network'])
            else:
                # Regular case create a link between origin and destination
                self.graph.new_link(link.attrib['origin'], link.attrib['dest'],
                                    link.attrib['latency'], jitter, drop,
                                    link.attrib['upload'],
                                    link.attrib['network'])
                if bidirectional:
                    self.graph.new_link(link.attrib['dest'],
                                        link.attrib['origin'],
                                        link.attrib['latency'], jitter, drop,
                                        link.attrib['download'],
                                        link.attrib['network'])
示例#19
0
def main():
    gc.set_debug(gc.DEBUG_STATS)
    setup_mocking()

    topology_file = sys.argv[1]

    graph = NetGraph()

    parser = XMLGraphParser(topology_file, graph)
    parser.fill_graph()
    print("Done parsing topology")

    #__debug_print_paths(graph)
    #return

    print("Skipping Resolving hostnames...")
    #graph.resolve_hostnames()
    #print("All hosts found!")

    seed(None)
    print("Randomly Determining the root of the tree...")
    sv = randrange(0, len(graph.services))
    while True:
        hosts = list(graph.services.values())[sv]
        h = randrange(0, len(hosts))
        root = list(graph.services.values())[sv][h]
        if root.supervisor:
            sv = randrange(0, len(graph.services))
            continue
        else:
            graph.root = root
            break
    '''
    for service in graph.services:
        graph.root = graph.services[service][0]
        if graph.root.supervisor:
            continue
        break
    '''

    if graph.root is None:
        print_and_fail(
            "Failed to identify current service instance in topology!")

    print("Calculating shortest paths...")
    graph.calculate_shortest_paths()

    for node in graph.paths:
        path = graph.paths[node]
        print("##############################")
        print(graph.root.name + " -> " + node.name + ":" +
              str(node.__hash__()))
        print("latency: " + str(path.latency))
        print("drop: " + str(path.drop))
        print("bandwidth: " + str(path.max_bandwidth))
        print("------------------------------")
        for link in path.links:
            print("   " + link.source.name + " hop " + link.destination.name +
                  " i:" + str(link.index))

    print("Initializing network emulation conditions...")

    scheduler = parser.parse_schedule(graph.root, graph)

    manager = EmulationManager(graph, scheduler)
    manager.initialize()

    print("Starting experiment!")
    # Enter the emulation loop
    manager.emulation_loop()
示例#20
0
    def resolve_ips(self, number_of_gods):

        try:
            own_ip = "(not yet known)"
            own_ip_int = ip2int("127.0.0.1")

            if number_of_gods > 0:
                print_named(
                    "god", "ip: " + str(own_ip) + ", nr. of gods: " +
                    str(number_of_gods))
            else:
                print_and_fail('there are no nodes on this "cluster".')

            # listen for msgs from other gods
            recv_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            recv_sock.bind(('', GOD_IPS_SHARE_PORT))

            # setup broadcast
            sender_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            sender_sock.bind(('', GOD_IPS_SHARE_PORT + 1))
            sender_sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
            sender_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            sender_sock.setblocking(False)

            # broadcast local IPs
            random_number = random.getrandbits(128)
            ip_broadcast = Process(target=self.broadcast_ips,
                                   args=(
                                       sender_sock,
                                       random_number,
                                   ))
            ip_broadcast.start()

            while len(self.gods) < number_of_gods:
                data, addr = recv_sock.recvfrom(self.BUFFER_LEN)
                msg = data.decode("utf-8").split()

                print_named("god1", f"{addr[0]} :: {msg}")
                ip_as_int = ip2int(addr[0])

                if msg[0] == "READY" and ip_as_int not in self.ready_gods:
                    self.ready_gods.append(ip_as_int)

                elif msg[0] == "HELLO" and ip_as_int not in self.gods:
                    self.gods[ip_as_int] = msg[1]

            # broadcast ready msgs
            ready_broadcast = Process(target=self.broadcast_ready,
                                      args=(sender_sock, ))
            ready_broadcast.start()

            while len(self.ready_gods) < number_of_gods:
                data, addr = recv_sock.recvfrom(self.BUFFER_LEN)
                msg = data.decode("utf-8").split()

                print_named("god2", f"{addr[0]} :: {msg[0]}")
                ipAsInt = ip2int(addr[0])

                if msg[0] == "READY" and ipAsInt not in self.ready_gods:
                    self.ready_gods.append(ipAsInt)

            # terminate all broadcasts
            ip_broadcast.terminate()
            ready_broadcast.terminate()
            ip_broadcast.join()
            ready_broadcast.join()

            # find owr own IP by matching our random_number
            # and delete ourselves from the list of other gods
            for key, value in self.gods.items():
                if str(random_number) == value:
                    own_ip_int = key
                    own_ip = int2ip(own_ip_int)
                    del self.gods[own_ip_int]
                    break

            print_named(
                "god",
                "ip: " + own_ip + ", nr. of gods: " + str(number_of_gods))

            # write all known IPs to a file to be read from c++ lib if necessary
            with open(LOCAL_IPS_FILE, 'a') as locals_file:
                locals_file.write(str(own_ip_int))

            with open(REMOTE_IPS_FILE, 'a') as remotes_file:
                for god in self.gods:
                    remotes_file.write(str(god) + "\n")

            known_ips = ""
            with open(LOCAL_IPS_FILE, 'r') as file:
                known_ips += "local IP: "
                for line in file.readlines():
                    known_ips += int2ip(int(line.strip())) + ", "

            known_ips += "\n           "
            with open(REMOTE_IPS_FILE, 'r') as file:
                known_ips += "remote IPs: "
                for line in file.readlines():
                    known_ips += int2ip(int(line.strip())) + ", "

            print_named("god", known_ips)

            return self.gods

        except Exception as e:
            print_and_fail(e)
示例#21
0
def main():
    if not (len(sys.argv) == 3 or len(sys.argv) == 4):
        msg = "Usage: deploymentGenerator.py <input topology> <orchestrator> > <output compose file>\n" \
             + "    <orchestrator> can be -s for Docker Swarm or -k for Kubernetes" \
             + "    optionally use -d to deactivate bandwidth emulation at runtime."

        print_and_fail(msg)

    shm_size = 8000000000
    aeron_lib_path = "/home/daedalus/Documents/aeron4need/cppbuild/Release/lib/libaeronlib.so"
    aeron_term_buffer_length = 64 * 1024 * 1024  # must be multiple of 64*1024
    aeron_ipc_term_buffer_length = 64 * 1024 * 1024  # must be multiple of 64*1024

    threading_mode = 'SHARED'  # aeron uses 1 thread
    # threading_mode = 'SHARED_NETWORK'   # aeron uses 2 threads
    # threading_mode = 'DEDICATED'        # aeron uses 3 threads

    pool_period = 0.05
    max_flow_age = 2

    output = ""

    # TODO use argparse to check for flags and arguments properly

    topology_file = sys.argv[1]

    orchestrator = "kubernetes" if sys.argv[2] == "-k" else "swarm"

    bw_emulation = False if (len(sys.argv) > 3
                             and sys.argv[3] == "-d") else True

    graph = NetGraph()

    XMLGraphParser(topology_file, graph).fill_graph()
    output += "Graph has " + str(len(graph.links)) + " links.\n"
    service_count = 0

    for hosts in graph.services:
        for host in graph.services[hosts]:
            service_count += 1

    output += "      has " + str(service_count) + " hosts.\n"

    if len(graph.links) > SHORT_LIMIT:
        print_and_fail("Topology has too many links: " + str(len(graph.links)))

    for path in graph.paths:
        if len(path.links) > 249:
            msg = "Path from " + path.links[0].source.name + " to " \
                  + path.links[-1].destination.name + " is too long (over 249 hops)"
            print_and_fail(msg)

    generator = None
    if orchestrator == "kubernetes":
        generator = KubernetesManifestGenerator(
            os.getcwd() + "/" + topology_file, graph)

    elif orchestrator == 'swarm':
        generator = DockerComposeFileGenerator(topology_file, graph)

    # insert here any other generators required by new orchestrators
    else:
        pass

    if generator is not None:
        generator.generate(pool_period, max_flow_age, threading_mode, shm_size,
                           aeron_lib_path, aeron_term_buffer_length,
                           aeron_ipc_term_buffer_length, bw_emulation)
        output += "Experiment UUID: " + generator.experiment_UUID
        print(output, file=sys.stderr)

    else:
        print("Failed to find a suitable generator.", file=sys.stderr)