def stop_arena(unit_id):
    """
    Arenas have server builds for the unit as well as individual workouts. This function
    stops all of these servers
    :param unit_id: The build ID of the arena
    :return: None
    """
    # First stop the unit's servers
    result = compute.instances().list(
        project=project, zone=zone,
        filter='name = {}*'.format(unit_id)).execute()
    unit = ds_client.get(ds_client.key('cybergym-unit', unit_id))
    unit['arena']['running'] = False
    ds_client.put(unit)
    g_logger = log_client.logger('arena-actions')
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()
        g_logger.log_struct(
            {"message": "Stopped servers for arena {}".format(unit_id)},
            severity=LOG_LEVELS)
    else:
        g_logger.log_struct(
            {"message": "No servers in arena {} to stop".format(unit_id)},
            severity=LOG_LEVELS.WARNING)

    for workout_id in unit['workouts']:
        g_logger = log_client.logger(str(workout_id))
        result = compute.instances().list(
            project=project, zone=zone,
            filter='name = {}*'.format(workout_id)).execute()
        workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
        workout['running'] = False
        ds_client.put(workout)
        if 'items' in result:
            for vm_instance in result['items']:
                response = compute.instances().stop(
                    project=project, zone=zone,
                    instance=vm_instance["name"]).execute()
            g_logger.log_struct(
                {
                    "message":
                    "Workout servers stopped for workout {}".format(workout_id)
                },
                severity=LOG_LEVELS.INFO)

        else:
            g_logger.log_struct(
                {
                    "message":
                    "No servers to stop for workout {}".format(workout_id)
                },
                severity=LOG_LEVELS.WARNING)
Exemplo n.º 2
0
def check_build_state_change(build_id, check_server_state, change_build_state):
    query_workout_servers = ds_client.query(kind='cybergym-server')
    query_workout_servers.add_filter("workout", "=", build_id)
    for check_server in list(query_workout_servers.fetch()):
        if check_server['state'] != check_server_state:
            return
    # If we've made it this far, then all of the servers have changed to the desired state.
    # now we can change the entire state.
    build = ds_client.get(ds_client.key('cybergym-workout', build_id))
    if not build:
        build = ds_client.get(ds_client.key('cybergym-unit', build_id))
    state_transition(build, change_build_state)
Exemplo n.º 3
0
def start_arena(unit_id):
    g_logger = log_client.logger('arena-actions')
    g_logger.log_struct({"message": "Starting arena {}".format(unit_id)},
                        severity=LOG_LEVELS.INFO)

    unit = ds_client.get(ds_client.key('cybergym-unit', unit_id))
    state_transition(entity=unit, new_state=BUILD_STATES.STARTING)
    unit['arena']['running'] = True
    unit['arena']['gm_start_time'] = str(calendar.timegm(time.gmtime()))
    ds_client.put(unit)

    # Start the central servers
    g_logger.log_struct(
        {"message": "Starting central servers for arena {}".format(unit_id)},
        severity=LOG_LEVELS.INFO)
    query_central_arena_servers = ds_client.query(kind='cybergym-server')
    query_central_arena_servers.add_filter("workout", "=", unit_id)
    for server in list(query_central_arena_servers.fetch()):
        # Publish to a server management topic
        pubsub_topic = PUBSUB_TOPICS.MANAGE_SERVER
        publisher = pubsub_v1.PublisherClient()
        topic_path = publisher.topic_path(project, pubsub_topic)
        future = publisher.publish(topic_path,
                                   data=b'Server Build',
                                   server_name=server['name'],
                                   action=SERVER_ACTIONS.START)
        print(future.result())

    # Now start all of the student workouts for this arena
    for workout_id in unit['workouts']:
        start_vm(workout_id)
Exemplo n.º 4
0
    def restore_server(server_name):
        """
        1) Delete the server, 2) Set the image name to be one previously snapshotted, 3) Build the server, and then
        4) restore the build configuration to the existing image in case it's later nuked.
        @param server_name: Name of the server.
        @type server_name: String
        @return: None
        """
        server_delete(server_name)

        server = ds_client.get(ds_client.key('cybergym-server', server_name))
        snapshots = compute.snapshots().list(
            project=project, filter=f"name = {server.key.name}*").execute()
        snapshot_name = snapshots['items'][0]['name']
        sourceSnapshot = f"projects/ualr-cybersecurity/global/snapshots/{snapshot_name}"
        disks = [{
            'boot': True,
            'autoDelete': True,
            'initializeParams': {
                'sourceSnapshot': sourceSnapshot,
            }
        }]
        old_config_disks = server['config']['disks']
        server['config']['disks'] = disks
        ds_client.put(server)
        server_build(server_name)
        # Now restore the old image config in case the user might later need to nuke the workout.
        server['config']['disks'] = old_config_disks
        ds_client.put(server)
Exemplo n.º 5
0
def workout_route_setup(workout_id):
    key = ds_client.key('cybergym-workout', workout_id)
    workout = ds_client.get(key)
    g_logger = log_client.logger(workout_id)

    if 'routes' in workout and workout['routes']:
        for route in workout['routes']:
            i = 0
            while not test_server_existence(
                    workout_id, route['next_hop_instance']) and i < 50:
                time.sleep(10)
                i += 1

            if i >= 50:
                g_logger.log_text(
                    f"Timeout waiting to add routes for {route['next_hop_instance']}"
                )
                return False

            r = {
                "name":
                "%s-%s" % (workout_id, route['name']),
                "network":
                "%s-%s" % (workout_id, route['network']),
                "destRange":
                route['dest_range'],
                "nextHopInstance":
                "%s-%s" % (workout_id, route['next_hop_instance'])
            }

            create_route(r)
Exemplo n.º 6
0
 def _process_workout_deletion(self):
     """
     Since workouts are deleted asynchronously, this functions is called when the last step of workout deletion
     occurs.
     @param workout_id: The ID of the workout to query
     @type workout_id: String
     @return: None
     @rtype: None
     """
     if self.build_type == WORKOUT_TYPES.ARENA:
         unit = ds_client.get(ds_client.key('cybergym-unit', self.build_id))
         all_workouts_deleted = True
         if unit:
             is_misfit = unit['arena'].get('misfit', None)
             # validate deletion state for student arena servers
             for workout_id in unit['workouts']:
                 query_workout_servers = ds_client.query(
                     kind='cybergym-server')
                 query_workout_servers.add_filter("workout", "=",
                                                  workout_id)
                 server_list = list(query_workout_servers.fetch())
                 for server in server_list:
                     workout_state = server['state']
                     if workout_state != BUILD_STATES.DELETED:
                         all_workouts_deleted = False
             # validate deletion state for student entry server
             query_unit_server = ds_client.query(kind='cybergym-server')
             query_unit_server.add_filter("workout", "=", self.build_id)
             unit_server = list(query_unit_server.fetch())
             student_entry_state = unit_server[0]['state']
             if student_entry_state != BUILD_STATES.DELETED:
                 all_workouts_deleted = False
             # if all machines have DELETED state, update arena state to DELETED
             if all_workouts_deleted:
                 state_transition(unit, BUILD_STATES.DELETED)
                 if is_misfit:
                     ds_client.delete(unit)
     elif self.build_type == WORKOUT_TYPES.WORKOUT:
         workout = ds_client.get(
             ds_client.key('cybergym-workout', self.build_id))
         if workout:
             state_transition(workout, BUILD_STATES.DELETED)
             is_misfit = workout.get('misfit', None)
             if is_misfit:
                 ds_client.delete(workout.key)
Exemplo n.º 7
0
def register_student_entry(build_id, server_name):
    build = ds_client.get(ds_client.key('cybergym-workout', build_id))
    if not build:
        build = ds_client.get(ds_client.key('cybergym-unit', build_id))
    # Add the external_IP address for the workout. This allows easy deletion of the DNS record when deleting the arena
    ip_address = get_server_ext_address(server_name)
    add_dns_record(build_id, ip_address)
    # Make sure the guacamole server actually comes up successfully before setting the workout state to ready
    print(f"DNS record set for {server_name}. Now Testing guacamole connection. This may take a few minutes.")
    if test_guacamole(ip_address):
        # Now, since this is the guacamole server, update the state of the workout to READY
        print(f"Setting the build {build_id} to ready")
        state_transition(entity=build, new_state=BUILD_STATES.READY)
    else:
        state_transition(entity=build, new_state=BUILD_STATES.GUACAMOLE_SERVER_LOAD_TIMEOUT)

    # Return the IP address used for the server build function to set the server datastore element
    return ip_address
def rebuild_workout(workout_id):
    """
    Builds a full workout
    :param workout_id: The workout_id to build
    """
    workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
    state_transition(entity=workout, new_state=BUILD_STATES.START)
    build_workout(workout_id)
    return
Exemplo n.º 9
0
def server_rebuild(server_name):
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    g_logger = log_client.logger(str(server_name))
    g_logger.log_text(f"Rebuilding server {server_name}")
    if 'state' in server:
        if server['state'] == 'RUNNING':
            server_stop(server_name)
    server_delete(server_name)

    server_build(server_name)
Exemplo n.º 10
0
def stop_arena(unit_id):
    """
    Arenas have server builds for the unit as well as individual workouts. This function
    stops all of these servers
    :param unit_id: The build ID of the arena
    :return: None
    """
    # First stop the unit's servers
    result = compute.instances().list(
        project=project, zone=zone,
        filter='name = {}*'.format(unit_id)).execute()
    unit = ds_client.get(ds_client.key('cybergym-unit', unit_id))
    unit['arena']['running'] = False
    ds_client.put(unit)
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()

        print("Unit servers stopped")
    else:
        print("No unit servers to stop")

    for workout_id in unit['workouts']:
        result = compute.instances().list(
            project=project, zone=zone,
            filter='name = {}*'.format(workout_id)).execute()
        workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
        workout['running'] = False
        ds_client.put(workout)
        if 'items' in result:
            for vm_instance in result['items']:
                response = compute.instances().stop(
                    project=project, zone=zone,
                    instance=vm_instance["name"]).execute()

            print("Workout servers stopped for %s" % workout_id)
        else:
            print("No workout servers to stop for %s" % workout_id)
Exemplo n.º 11
0
 def _delete_specific_workout(self):
     """
     Delete all resources in a specific workout. This is called as a cloud function
     @return: None
     @rtype:
     """
     workout = ds_client.get(
         ds_client.key('cybergym-workout', self.build_id))
     if not workout:
         return False
     cloud_log(self.build_id, f"Deleting workout {self.build_id}",
               LOG_LEVELS.INFO)
     try:
         delete_dns(self.build_id, workout["external_ip"])
     except HttpError:
         cloud_log(
             self.build_id,
             f"DNS record does not exist for workout {self.build_id}",
             LOG_LEVELS.WARNING)
         pass
     except KeyError:
         cloud_log(self.build_id,
                   f"Workout {self.build_id} has no external IP address",
                   LOG_LEVELS.WARNING)
         pass
     if 'state' in workout:
         if workout['state'] in [BUILD_STATES.READY, BUILD_STATES.RUNNING]:
             state_transition(workout, BUILD_STATES.DELETING_SERVERS)
         elif workout['state'] == BUILD_STATES.DELETED:
             return False
     self._delete_vms()
     if self._wait_for_deletion(wait_type=ArenaWorkoutDeleteType.SERVER):
         if self._delete_firewall_rules():
             self._wait_for_deletion(
                 wait_type=ArenaWorkoutDeleteType.FIREWALL_RULES)
             if self._delete_routes():
                 if self._delete_subnetworks():
                     self._wait_for_deletion(
                         wait_type=ArenaWorkoutDeleteType.SUBNETWORK)
                     if self._delete_network():
                         state_transition(workout, BUILD_STATES.DELETED)
                         cloud_log(
                             self.build_id,
                             f"Finished deleting workout{self.build_id}",
                             LOG_LEVELS.INFO)
                         if workout['misfit']:
                             ds_client.delete(workout.key)
                         return True
     cloud_log(self.build_id,
               f"There was a problem deleting workout {self.build_id}",
               LOG_LEVELS.ERROR)
     return False
Exemplo n.º 12
0
def server_delete(server_name):
    server = ds_client.get(ds_client.key('cybergym-server', server_name))

    state_transition(entity=server, new_state=SERVER_STATES.DELETING)
    workout_globals.refresh_api()
    try:
        response = compute.instances().delete(project=project, zone=zone, instance=server_name).execute()
    except HttpError as exception:
        # If the server is already deleted or no longer exists,
        state_transition(entity=server, new_state=SERVER_STATES.DELETED)
        print(f"Finished deleting {server_name}")

        # If all servers in the workout have been deleted, then set the workout state to True
        build_id = server['workout']
        check_build_state_change(build_id=build_id, check_server_state=SERVER_STATES.DELETED,
                                 change_build_state=BUILD_STATES.COMPLETED_DELETING_SERVERS)
        return True
    print(f'Sent delete request to {server_name}, and waiting for response')
    i = 0
    success = False
    while not success and i < 5:
        try:
            print(f"Begin waiting for delete response from operation {response['id']}")
            compute.zoneOperations().wait(project=project, zone=zone, operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            print('Response timeout for deleting server. Trying again')
            pass
    if not success:
        print(f'Timeout in trying to delete server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False

    # If this is a student entry server, delete the DNS
    if 'student_entry' in server and server['student_entry']:
        print(f'Deleting DNS record for {server_name}')
        ip_address = server['external_ip']
        delete_dns(server['workout'], ip_address)

    state_transition(entity=server, new_state=SERVER_STATES.DELETED)
    print(f"Finished deleting {server_name}")

    # If all servers in the workout have been deleted, then set the workout state to True
    build_id = server['workout']
    check_build_state_change(build_id=build_id, check_server_state=SERVER_STATES.DELETED,
                             change_build_state=BUILD_STATES.COMPLETED_DELETING_SERVERS)
    return True

# server_start('hxckdwxwld-nested')
# server_delete('oztfvquhhi-cybergym-publicprivate')
def create_new_workout_in_unit(unit_id,
                               student_name,
                               email_address=None,
                               registration_required=False):
    """
    Use this script to add a new workout for a new registered user for a preexising unit
    @param unit_id: The unit_id to add the server to
    @type unit_id: String
    @param student_name: Name of student to add
    @type build_server_spec: String
    @param email_address: Email address of the student to add
    @type build_server_spec: String
    @param registration_required: Whether the new workout requires registration
    @return: None
    @rtype: None
    """
    unit = ds_client.get(ds_client.key('cybergym-unit', unit_id))
    workout_template_id = unit['workouts'][0]

    new_workout = ds_client.get(
        ds_client.key('cybergym-workout', workout_template_id))
    new_id = ''.join(random.choice(string.ascii_lowercase) for i in range(10))
    new_workout.key = ds_client.key('cybergym-workout', new_id)
    new_workout['state'] = BUILD_STATES.START
    new_workout['student_email'] = email_address
    new_workout['student_name']['student_name'] = student_name
    new_workout['student_name']['student_email'] = email_address
    new_workout['registration_required'] = registration_required

    unit['workouts'].append(new_workout.key.name)

    ds_client.put(unit)
    ds_client.put(new_workout)
    if registration_required:
        print(f"New registered workout created for {email_address}")
    else:
        print(f"New workout ID is {new_id}")
def add_child_project(child_project):
    """
    Add a child project to this parent project. This function gets called by the main application when
    a new child project has been provisioned.
    @param child_project: Name of the child project
    @type child_project: String
    @return: None
    """
    admin_info = ds_client.get(ds_client.key(AdminInfoEntity.KIND, 'cybergym'))
    if AdminInfoEntity.Entities.CHILD_PROJECTS in admin_info:
        admin_info[AdminInfoEntity.Entities.CHILD_PROJECTS].append(
            child_project)
    else:
        admin_info[AdminInfoEntity.Entities.CHILD_PROJECTS] = [child_project]
    ds_client.put(admin_info)
    return True
Exemplo n.º 15
0
def start_vm(workout_id):
    print("Starting workout %s" % workout_id)
    workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
    state_transition(entity=workout, new_state=BUILD_STATES.STARTING)
    workout['start_time'] = str(calendar.timegm(time.gmtime()))
    ds_client.put(workout)

    query_workout_servers = ds_client.query(kind='cybergym-server')
    query_workout_servers.add_filter("workout", "=", workout_id)
    for server in list(query_workout_servers.fetch()):
        # Publish to a server management topic
        pubsub_topic = PUBSUB_TOPICS.MANAGE_SERVER
        publisher = pubsub_v1.PublisherClient()
        topic_path = publisher.topic_path(project, pubsub_topic)
        future = publisher.publish(topic_path, data=b'Server Build', server_name=server['name'],
                                   action=SERVER_ACTIONS.START)
        print(future.result())
def nuke_workout(workout_id):
    """
    :param workout_id: The ID of the workout specification in the Datastore
    :returns: None
    """
    g_logger = log_client.logger(str(workout_id))
    workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
    g_logger.log_text("Nuke Operation: Delete")
    state_transition(entity=workout, new_state=BUILD_STATES.NUKING)
    DeletionManager(deletion_type=DeletionManager.DeletionType.SPECIFIC,
                    build_id=workout_id,
                    build_type=WORKOUT_TYPES.WORKOUT).run()
    time.sleep(60)

    g_logger.log_text("Nuke Operation: Rebuild")
    state_transition(entity=workout, new_state=BUILD_STATES.START)
    build_workout(workout_id)
    return
Exemplo n.º 17
0
def stop_workout(workout_id):
    result = compute.instances().list(
        project=project, zone=zone,
        filter='name = {}*'.format(workout_id)).execute()
    workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
    state_transition(entity=workout,
                     new_state=BUILD_STATES.READY,
                     existing_state=BUILD_STATES.RUNNING)
    ds_client.put(workout)
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()

        print("Workouts stopped")
    else:
        print("No workouts to stop")
def stop_workout(workout_id):
    result = compute.instances().list(
        project=project, zone=zone,
        filter='name = {}*'.format(workout_id)).execute()
    workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
    state_transition(entity=workout,
                     new_state=BUILD_STATES.READY,
                     existing_state=BUILD_STATES.RUNNING)
    start_time = None
    if 'start_time' in workout:
        start_time = workout['start_time']
        stop_time = calendar.timegm(time.gmtime())
        runtime = int(stop_time) - int(start_time)
        if 'runtime_counter' in workout:
            accumulator = workout['runtime_counter']
            new_runtime = int(accumulator) + runtime
            workout['runtime_counter'] = new_runtime
        else:
            workout['runtime_counter'] = runtime
    ds_client.put(workout)
    query_workout_servers = ds_client.query(kind='cybergym-server')
    query_workout_servers.add_filter("workout", "=", workout_id)
    for server in list(query_workout_servers.fetch()):
        # Publish to a server management topic
        pubsub_topic = PUBSUB_TOPICS.MANAGE_SERVER
        publisher = pubsub_v1.PublisherClient()
        topic_path = publisher.topic_path(project, pubsub_topic)
        future = publisher.publish(topic_path,
                                   data=b'Server Build',
                                   server_name=server['name'],
                                   action=SERVER_ACTIONS.STOP)
        print(future.result())
    g_logger = log_client.logger(str(workout_id))
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()
        g_logger.log_struct({"message": "Workout stopped"},
                            severity=LOG_LEVELS.INFO)
    else:
        g_logger.log_struct({"message": "No workouts to stop"},
                            severity=LOG_LEVELS.WARNING)
Exemplo n.º 19
0
def server_stop(server_name):
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    g_logger = log_client.logger(str(server_name))
    state_transition(entity=server, new_state=SERVER_STATES.STOPPING)

    i = 0
    stop_success = False
    while not stop_success and i < 5:
        workout_globals.refresh_api()
        try:
            response = compute.instances().stop(
                project=project, zone=zone, instance=server_name).execute()
            stop_success = True
            g_logger.log_text(
                f'Sent job to start {server_name}, and waiting for response')
            return True
        except BrokenPipeError:
            i += 1

    return False
Exemplo n.º 20
0
def server_start(server_name):
    """
    Starts a server based on the specification in the Datastore entity with name server_name. A guacamole server
    is also registered with DNS.
    :param server_name: The Datastore entity name of the server to start
    :return: A boolean status on the success of the start
    """
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    state_transition(entity=server, new_state=SERVER_STATES.STARTING)
    workout_globals.refresh_api()
    response = compute.instances().start(project=project, zone=zone, instance=server_name).execute()
    print(f'Sent start request to {server_name}, and waiting for response')
    i = 0
    success = False
    while not success and i < 5:
        try:
            print(f"Begin waiting for start response from operation {response['id']}")
            compute.zoneOperations().wait(project=project, zone=zone, operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            print('Response timeout for starting server. Trying again')
            pass
    if not success:
        print(f'Timeout in trying to start server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False
    # If this is the guacamole server for student entry, then register the new DNS
    if 'student_entry' in server and server['student_entry']:
        print(f'Setting DNS record for {server_name}')
        ip_address = register_student_entry(server['workout'], server_name)
        server['external_ip'] = ip_address

    state_transition(entity=server, new_state=SERVER_STATES.RUNNING)
    print(f"Finished starting {server_name}")

    # If all servers have started, then change the build state
    build_id = server['workout']
    check_build_state_change(build_id=build_id, check_server_state=SERVER_STATES.RUNNING,
                             change_build_state=BUILD_STATES.RUNNING)
    return True
Exemplo n.º 21
0
def fix_student_entry_in_workout(workout_id):
    workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
    print("Starting to delete student entry")
    server_delete(f"{workout_id}-student-guacamole")
    print("Finished deleting student entry")

    guac_connection = []
    if 'connections' in workout['student_entry']:
        network_name = f"{workout_id}-{workout['student_entry']['network']}"
        for entry in workout['student_entry']['connections']:
            connection = create_guac_connection(workout_id, entry)
            guac_connection.append(connection)
    else:
        network_name = f"{workout_id}-{workout['student_entry']['network']}"
        guac_connection.append(
            create_guac_connection(workout_id, workout['student_entry']))

    print("Starting to build student entry")
    build_guacamole_server(build=workout,
                           network=network_name,
                           guacamole_connections=guac_connection)
    print("Finished sending job to build student entry")
Exemplo n.º 22
0
def build_arena(unit_id):
    """
    Builds an arena of student servers and a common compute environment according to the specification referenced in
    the workout_unit datastore
    :param unit_id: The workout_id key in the datastore holding the build specification
    :return: None
    """
    key = ds_client.key('cybergym-unit', unit_id)
    unit = ds_client.get(key)
    # This can sometimes happen when debugging a Unit ID and the Datastore record no longer exists.
    arena = unit['arena']
    if not arena:
        print('No unit %s exists in the data store' % unit_id)
        return

    if 'state' not in unit or not unit['state']:
        state_transition(entity=unit, new_state=BUILD_STATES.START)

    # # Parse the assessment specification to obtain any startup scripts for the workout.
    # startup_scripts = None
    # if unit['assessment']:
    #     startup_scripts = get_startup_scripts(workout_id=workout_id, assessment=workout['assessment'])
    # # Create the networks and subnets
    # First create the student servers
    print('Creating student servers')
    build_student_servers(unit_id=unit_id, workouts=unit['workouts'],
                          student_entry_type=arena['student_entry_type'],
                          student_entry_server=arena['student_entry'],
                          student_entry_username=arena['student_entry_username'],
                          student_entry_password=arena['student_entry_password'],
                          network_type=arena['student_network_type'])

    # STATE: BUILDING_ARENA_NETWORKS
    if check_ordered_arenas_state(unit, BUILD_STATES.BUILDING_ARENA_NETWORKS):
        state_transition(entity=unit, new_state=BUILD_STATES.BUILDING_ARENA_NETWORKS)
        if arena['networks']:
            print('Creating additional arena networks')
            create_network(networks=arena['networks'], build_id=unit_id)
        state_transition(entity=unit, new_state=BUILD_STATES.COMPLETED_ARENA_NETWORKS)

    # STATE: BUILDING_ARENA_SERVERS
    if check_ordered_arenas_state(unit, BUILD_STATES.BUILDING_ARENA_SERVERS):
        state_transition(entity=unit, new_state=BUILD_STATES.BUILDING_ARENA_SERVERS)
        print('Creating additional servers')
        i = 101
        for server in arena['servers']:
            server_name = "%s-%s" % (unit_id, server['name'])
            sshkey = server["sshkey"]
            guac_path = server['guac_path']
            tags = server['tags']
            machine_type = server["machine_type"]
            network_routing = server["network_routing"]
            # If a nic is not specified, then add the server to the student-network.
            if server['nics']:
                nics = []
                for n in server['nics']:
                    if 'network' not in n:
                        n['network'] = student_network_name
                    if 'internal_IP' not in n:
                        n['internal_IP'] = f'10.1.0.{i}'
                    if 'subnet' not in n:
                        n['subnet'] = 'default'
                    if 'external_NAT' not in n:
                        n['external_NAT'] = False
                    nic = {
                        "network": "%s-%s" % (unit_id, n['network']),
                        "internal_IP": n['internal_IP'],
                        "subnet": "%s-%s-%s" % (unit_id, n['network'], n['subnet']),
                        "external_NAT": n['external_NAT']
                    }
                    nics.append(nic)
            else:
                nics = [
                    {
                        "network": "%s-%s" % (unit_id, student_network_name),
                        "internal_IP": f'10.1.0.{i}',
                        "subnet": "%s-%s-%s" % (unit_id, student_network_name, 'default'),
                        "external_NAT": False
                    }
                ]

            create_instance_custom_image(compute=compute, workout=unit_id, name=server_name,
                                         custom_image=server['image'], machine_type=machine_type,
                                         networkRouting=network_routing, networks=nics, tags=tags,
                                         meta_data=None, sshkey=sshkey)
            i += 1
        state_transition(entity=unit, new_state=BUILD_STATES.COMPLETED_ARENA_SERVERS)

    # STATE: BUILDING_ROUTES
    if check_ordered_arenas_state(unit, BUILD_STATES.BUILDING_ROUTES):
        state_transition(entity=unit, new_state=BUILD_STATES.BUILDING_ROUTES)
        print('Creating network routes and firewall rules')
        if 'routes' in arena and arena['routes']:
            for route in arena['routes']:
                r = {"name": "%s-%s" % (unit_id, route['name']),
                     "network": "%s-%s" % (unit_id, route['network']),
                     "destRange": route['dest_range'],
                     "nextHopInstance": "%s-%s" % (unit_id, route['next_hop_instance'])}
                create_route(route)
        state_transition(entity=unit, new_state=BUILD_STATES.COMPLETED_ROUTES)

    # STATE: BUILDING_FIREWALL
    if check_ordered_arenas_state(unit, BUILD_STATES.BUILDING_FIREWALL):
        state_transition(entity=unit, new_state=BUILD_STATES.BUILDING_FIREWALL)
        firewall_rules = []
        for rule in arena['firewall_rules']:
            if 'network' not in rule:
                rule['network'] = student_network_name
            firewall_rules.append({"name": "%s-%s" % (unit_id, rule['name']),
                                   "network": "%s-%s" % (unit_id, rule['network']),
                                   "targetTags": rule['target_tags'],
                                   "protocol": rule['protocol'],
                                   "ports": rule['ports'],
                                   "sourceRanges": rule['source_ranges']})

        # Create the default rules to allow traffic between student networks.
        firewall_rules.append({"name": "%s-%s" % (unit_id, 'allow-all-internal'),
                               "network": "%s-%s" % (unit_id, student_network_name),
                               "targetTags": [],
                               "protocol": 'tcp',
                               "ports": ['tcp/any', 'udp/any', 'icmp/any'],
                               "sourceRanges": [student_network_subnet]})

        create_firewall_rules(firewall_rules)
        state_transition(entity=unit, new_state=BUILD_STATES.COMPLETED_FIREWALL)

    state_transition(entity=unit, new_state=BUILD_STATES.READY)
Exemplo n.º 23
0
def build_student_servers(unit_id, workouts, student_entry_server, student_entry_type, student_entry_username,
                          student_entry_password, network_type):
    """
    Builds the student servers for the arena. In the arena, all student servers have the same configuration.
    However, they may be on distinct networks or the same network.
    :param unit_id: Unit ID for the Arena build. This is used to pull data from the datastore
    :param workouts: An array of workout_ids for this arena.
    :param student_entry_server: The name of the server used for students to access the arena
    :param student_entry_type: Either vnc or rdp
    :param student_entry_username: The username for the server or None for vnc
    :param student_entry_password: The password for the server
    :param network_type: Either a same network for all students or each student gets their distinct workout.
    :return:
    """
    # STATE: BUILDING_ARENA_NETWORKS
    unit = ds_client.get(ds_client.key('cybergym-unit', unit_id))
    if check_ordered_arenas_state(unit, BUILD_STATES.BUILDING_ARENA_STUDENT_NETWORKS):
        state_transition(entity=unit, new_state=BUILD_STATES.BUILDING_ARENA_STUDENT_NETWORKS)
        # If all of the student servers are in the same network, then create a single network first
        guac_network = ''
        if network_type == 'same':
            build_student_network(build_id=unit_id, subnet=student_network_subnet)
            network_name = "%s-%s" % (unit_id, student_network_name)
            subnet_name = "%s-%s-%s" % (unit_id, student_network_name, 'default')
            guac_network = network_name
        elif network_type == 'distinct':
            for workout_id in workouts:
                i = 0 if i == 0 else i + 1
                build_student_network(build_id=workout_id, subnet='10.1.' + str(i) + '.0/24')
                network_name = "%s-%s" % (workout_id, student_network_name)
                subnet_name = "%s-%s-%s" % (workout_id, student_network_name, 'default')
                if not guac_network:
                    guac_network = network_name
        state_transition(entity=unit, new_state=BUILD_STATES.COMPLETED_ARENA_STUDENT_NETWORKS)

    # STATE: BUILDING_ARENA_SERVERS
    # The IP address of each server is dynamically assigned as 10.1.i.j
    if check_ordered_arenas_state(unit, BUILD_STATES.BUILDING_ARENA_STUDENT_SERVERS):
        state_transition(entity=unit, new_state=BUILD_STATES.BUILDING_ARENA_STUDENT_SERVERS)
        i = 0
        j = 2
        guacamole_connections = []
        for workout_id in workouts:
            workout_key = ds_client.key('cybergym-workout', workout_id)
            workout = ds_client.get(workout_key)

            for server in workout['student_servers']:
                internal_ip_address = f'10.1.{i}.{j}'
                if server['name'] == student_entry_server:
                    guac_connection = {
                        'workout_id': workout_id,
                        'entry_type': student_entry_type,
                        'ip': internal_ip_address,
                        'username': student_entry_username,
                        'password': student_entry_password
                    }
                    guacamole_connections.append(guac_connection)
                server_name = "%s-%s" % (workout_id, server['name'])
                sshkey = server["sshkey"]
                tags = server['tags']
                machine_type = server["machine_type"]
                network_routing = server["network_routing"]
                nics = [{
                    "network": network_name,
                    "internal_IP": internal_ip_address,
                    "subnet": subnet_name,
                    "external_NAT": False
                }]
                create_instance_custom_image(compute=compute, workout=workout_id, name=server_name,
                                             custom_image=server['image'], machine_type=machine_type,
                                             networkRouting=network_routing, networks=nics, tags=tags,
                                             meta_data=None, sshkey=sshkey)
                j += 1
        # Build the workout entry server and create the firewall rule to make it accessible.
        build_guacamole_server(build=unit, network=guac_network,
                               guacamole_connections=guacamole_connections)
        # Get the unit datastore entry again because building the guacamole server adds credentials.
        unit = ds_client.get(ds_client.key('cybergym-unit', unit_id))
        state_transition(entity=unit, new_state=BUILD_STATES.COMPLETED_ARENA_STUDENT_SERVERS)
Exemplo n.º 24
0
def server_build(server_name):
    """
    Builds an individual server based on the specification in the Datastore entity with name server_name.
    :param server_name: The Datastore entity name of the server to build
    :return: A boolean status on the success of the build
    """
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    build_id = server['workout']
    g_logger = log_client.logger(str(server_name))
    state_transition(entity=server, new_state=SERVER_STATES.BUILDING)
    config = server['config'].copy()
    """
    Currently, we need a workaround to insert the guacamole startup script because of a 1500 character limit on
    indexed fields. The exclude_from_index does not work on embedded datastore fields
    """
    if 'student_entry' in server and server['student_entry']:
        config['metadata'] = {
            'items': [{
                "key": "startup-script",
                "value": server['guacamole_startup_script']
            }]
        }

    # Begin the server build and keep trying for a bounded number of additional 30-second cycles
    i = 0
    build_success = False
    while not build_success and i < 5:
        workout_globals.refresh_api()
        try:
            if server['add_disk']:
                try:
                    image_config = {
                        "name":
                        server_name + "-disk",
                        "sizeGb":
                        server['add_disk'],
                        "type":
                        "projects/" + project + "/zones/" + zone +
                        "/diskTypes/pd-ssd"
                    }
                    response = compute.disks().insert(
                        project=project, zone=zone,
                        body=image_config).execute()
                    compute.zoneOperations().wait(
                        project=project, zone=zone,
                        operation=response["id"]).execute()
                except HttpError as err:
                    # If the disk already exists (i.e. a nuke), then ignore
                    if err.resp.status in [409]:
                        pass
            if server['build_type'] == BUILD_TYPES.MACHINE_IMAGE:
                source_machine_image = f"projects/{project}/global/machineImages/{server['machine_image']}"
                compute_beta = discovery.build('compute', 'beta')
                response = compute_beta.instances().insert(
                    project=project,
                    zone=zone,
                    body=config,
                    sourceMachineImage=source_machine_image).execute()
            else:
                if "delayed_start" in server and server["delayed_start"]:
                    time.sleep(30)
                response = compute.instances().insert(project=project,
                                                      zone=zone,
                                                      body=config).execute()
            build_success = True
            g_logger.log_text(
                f'Sent job to build {server_name}, and waiting for response')
        except BrokenPipeError:
            i += 1
        except HttpError as exception:
            cloud_log(
                build_id,
                f"Error when trying to build {server_name}: {exception.reason}",
                LOG_LEVELS.ERROR)
            return False
    i = 0
    success = False
    while not success and i < 5:
        try:
            g_logger.log_text(
                f"Begin waiting for build operation {response['id']}")
            compute.zoneOperations().wait(project=project,
                                          zone=zone,
                                          operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            g_logger.log_text('Response timeout for build. Trying again')
            pass

    if success:
        g_logger.log_text(f'Successfully built server {server_name}')
        state_transition(entity=server,
                         new_state=SERVER_STATES.RUNNING,
                         existing_state=SERVER_STATES.BUILDING)
    else:
        g_logger.log_text(f'Timeout in trying to build server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False

    # If this is a student entry server, register the DNS
    if 'student_entry' in server and server['student_entry']:
        g_logger.log_text(f'Setting DNS record for {server_name}')
        ip_address = register_student_entry(server['workout'], server_name)
        server['external_ip'] = ip_address
        ds_client.put(server)
        server = ds_client.get(ds_client.key('cybergym-server', server_name))

    # Now stop the server before completing
    g_logger.log_text(f'Stopping {server_name}')
    compute.instances().stop(project=project, zone=zone,
                             instance=server_name).execute()
    state_transition(entity=server, new_state=SERVER_STATES.STOPPED)

    # If no other servers are building, then set the workout to the state of READY.
    check_build_state_change(build_id=build_id,
                             check_server_state=SERVER_STATES.STOPPED,
                             change_build_state=BUILD_STATES.READY)
def build_workout(workout_id):
    """
    Builds a workout compute environment according to the specification referenced in the datastore with key workout_id
    :param workout_id: The workout_id key in the datastore holding the build specification
    :return: None
    """
    key = ds_client.key('cybergym-workout', workout_id)
    workout = ds_client.get(key)
    # This can sometimes happen when debugging a workout ID and the Datastore record no longer exists.
    if not workout:
        cloud_log(workout_id,
                  f"The datastore record for {workout_id} no longer exists!",
                  LOG_LEVELS.ERROR)
        raise LookupError

    if 'state' not in workout or not workout['state']:
        state_transition(entity=workout, new_state=BUILD_STATES.START)

    # Create the networks and subnets
    if check_ordered_workout_state(workout, BUILD_STATES.BUILDING_NETWORKS):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_NETWORKS)
        for network in workout['networks']:
            cloud_log(workout_id,
                      f"Building network {workout_id}-{network['name']}",
                      LOG_LEVELS.INFO)
            network_body = {
                "name": f"{workout_id}-{network['name']}",
                "autoCreateSubnetworks": False,
                "region": region
            }
            try:
                response = compute.networks().insert(
                    project=project, body=network_body).execute()
                compute.globalOperations().wait(
                    project=project, operation=response["id"]).execute()
                time.sleep(10)
            except HttpError as err:
                # If the network already exists, then this may be a rebuild and ignore the error
                if err.resp.status in [409]:
                    pass
            for subnet in network['subnets']:
                cloud_log(
                    workout_id,
                    f"Building the subnetwork {network_body['name']}-{subnet['name']}",
                    LOG_LEVELS.INFO)
                subnetwork_body = {
                    "name":
                    f"{network_body['name']}-{subnet['name']}",
                    "network":
                    "projects/%s/global/networks/%s" %
                    (project, network_body['name']),
                    "ipCidrRange":
                    subnet['ip_subnet']
                }
                try:
                    response = compute.subnetworks().insert(
                        project=project, region=region,
                        body=subnetwork_body).execute()
                    compute.regionOperations().wait(
                        project=project,
                        region=region,
                        operation=response["id"]).execute()
                except HttpError as err:
                    # If the subnetwork already exists, then this may be a rebuild and ignore the error
                    if err.resp.status in [409]:
                        pass
            state_transition(entity=workout,
                             new_state=BUILD_STATES.COMPLETED_NETWORKS)

    # Now create the server configurations
    if check_ordered_workout_state(workout, BUILD_STATES.BUILDING_SERVERS):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_SERVERS)
        pubsub_topic = PUBSUB_TOPICS.MANAGE_SERVER
        publisher = pubsub_v1.PublisherClient()
        topic_path = publisher.topic_path(project, pubsub_topic)
        for server in workout['servers']:
            server_name = f"{workout_id}-{server['name']}"
            cloud_log(workout_id,
                      f"Sending pubsub message to build {server_name}",
                      LOG_LEVELS.INFO)
            publisher.publish(topic_path,
                              data=b'Server Build',
                              server_name=server_name,
                              action=SERVER_ACTIONS.BUILD)
        # Also build the student entry server for the workout
        publisher.publish(topic_path,
                          data=b'Server Build',
                          server_name=f"{workout_id}-student-guacamole",
                          action=SERVER_ACTIONS.BUILD)
        state_transition(entity=workout,
                         new_state=BUILD_STATES.COMPLETED_SERVERS)
    # Create all of the network routes and firewall rules
    if check_ordered_workout_state(workout, BUILD_STATES.BUILDING_ROUTES):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_ROUTES)
        cloud_log(
            workout_id,
            f"Creating network routes and firewall rules for {workout_id}",
            LOG_LEVELS.INFO)
        if 'routes' in workout and workout['routes']:
            workout_route_setup(workout_id)
    if check_ordered_workout_state(workout, BUILD_STATES.BUILDING_FIREWALL):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_FIREWALL)
        firewall_rules = []
        for rule in workout['firewall_rules']:
            firewall_rules.append({
                "name":
                "%s-%s" % (workout_id, rule['name']),
                "network":
                "%s-%s" % (workout_id, rule['network']),
                "targetTags":
                rule['target_tags'],
                "protocol":
                rule['protocol'],
                "ports":
                rule['ports'],
                "sourceRanges":
                rule['source_ranges']
            })
        create_firewall_rules(firewall_rules)
        state_transition(entity=workout,
                         new_state=BUILD_STATES.COMPLETED_FIREWALL)
    cloud_log(
        workout_id,
        f"Finished the build process with a final state: {workout['state']}",
        LOG_LEVELS.INFO)
Exemplo n.º 26
0
 def _delete_specific_arena(self):
     """
     Arenas are unique in having both resources at the unit level and individual workout resources. This
     function addresses those differences
     :param arena_id: The Unit_ID of this arena
     :return:
     """
     unit = ds_client.get(ds_client.key('cybergym-unit', self.build_id))
     cloud_log(self.build_id, f"Deleting arena {self.build_id}",
               LOG_LEVELS.INFO)
     try:
         # Arena external_ip is tied to <unit_id>_student_entry server.
         # We need to query that entity in order to delete the proper DNS record
         query_student_entry_server = ds_client.query(
             kind='cybergym-server')
         query_student_entry_server.add_filter("workout", "=",
                                               self.build_id)
         unit_server = list(query_student_entry_server.fetch())
         if unit_server:
             delete_dns(self.build_id, unit_server[0]["external_ip"])
     except HttpError:
         cloud_log(self.build_id,
                   f"DNS record does not exist for arena {self.build_id}",
                   LOG_LEVELS.WARNING)
         pass
     except KeyError:
         cloud_log(self.build_id,
                   f"No external IP address for arena {self.build_id}",
                   LOG_LEVELS.WARNING)
         pass
     pubsub_topic = PUBSUB_TOPICS.DELETE_EXPIRED
     publisher = pubsub_v1.PublisherClient()
     topic_path = publisher.topic_path(project, pubsub_topic)
     for workout_id in unit['workouts']:
         # For each student arena server, publish message with workout_id to delete that instance
         publisher.publish(
             topic_path,
             data=b'Workout Delete',
             workout_type=WORKOUT_TYPES.ARENA,
             workout_id=workout_id,
             arena_workout_delete_type=ArenaWorkoutDeleteType.SERVER)
     # Only student guacamole server remains. Publish message using build_id (arena unit_id)
     publisher.publish(
         topic_path,
         data=b'Workout Delete',
         workout_type=WORKOUT_TYPES.ARENA,
         workout_id=self.build_id,
         arena_workout_delete_type=ArenaWorkoutDeleteType.SERVER)
     time.sleep(60)
     # Now delete all of the network elements
     self._delete_firewall_rules()
     for workout_id in unit['workouts']:
         publisher.publish(topic_path,
                           data=b'Workout Delete',
                           workout_type=WORKOUT_TYPES.ARENA,
                           workout_id=workout_id,
                           arena_workout_delete_type=ArenaWorkoutDeleteType.
                           FIREWALL_RULES)
     time.sleep(30)
     self._delete_routes()
     self._delete_subnetworks()
     for workout_id in unit['workouts']:
         publisher.publish(
             topic_path,
             data=b'Workout Delete',
             workout_type=WORKOUT_TYPES.ARENA,
             workout_id=workout_id,
             arena_workout_delete_type=ArenaWorkoutDeleteType.ROUTES)
     time.sleep(30)
     self._delete_network()
     for workout_id in unit['workouts']:
         publisher.publish(
             topic_path,
             data=b'Workout Delete',
             workout_type=WORKOUT_TYPES.ARENA,
             workout_id=workout_id,
             arena_workout_delete_type=ArenaWorkoutDeleteType.NETWORK)
     return True
Exemplo n.º 27
0
def medic():
    """
    Reviews the state of all active workouts in the project and attempts to correct any which may have an invalid
    state. Invalid states often occur due to timeouts in processing the Google Cloud Functions.
    :returns: None
    """
    g_logger = log_client.logger('workout-actions')
    g_logger.log_text("MEDIC: Running Medic function")
    #
    # Fixing build timeout issues
    #
    # The add_filter does not have a != operator. This provides an equivalent results for active workouts.
    query_current_workouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_current_workouts.add_filter('active', '=', True).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                if 'state' in workout:
                    build_state = workout['state']
                    # if the workout state has not completed, then attempt to continue rebuilding the workout from where
                    # it left off.
                    if build_state in ordered_workout_build_states:
                        g_logger.log_text(
                            "MEDIC: Workout {} is in a build state of {}. Attempting to fix..."
                            .format(workout.key.name, build_state))
                        build_workout(workout_id=workout.key.name)
                elif type(workout) is datastore.entity.Entity:
                    # If there is no state, then this is not a valid workout, and we can delete the Datastore entity.
                    g_logger.log_text(
                        "Invalid workout specification in the datastore for workout ID: {}. Deleting the record."
                        .format(workout.key.name))
                    ds_client.delete(workout.key)
    #
    # Fixing workouts in state COMPLETED_FIREWALL. This may occur when the firewall gets built after the guacamole server
    #
    query_completed_firewalls = ds_client.query(kind='cybergym-workout')
    results = list(
        query_completed_firewalls.add_filter(
            "state", "=", BUILD_STATES.COMPLETED_FIREWALL).fetch())
    for workout in results:
        # Only transition the state if the last state change occurred over 5 minutes ago.
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 300):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in firewall completion. Changing state to READY"
                        .format(workout.key.name))
                    state_transition(workout, new_state=BUILD_STATES.RUNNING)
                    stop_workout(workout.key.name)
    #
    # Fixing workouts in state GUACAMOLE_SERVER_TIMEOUT. This may occur waiting for the guacamole server to come up
    #
    query_student_entry_timeouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_student_entry_timeouts.add_filter(
            "state", "=", BUILD_STATES.GUACAMOLE_SERVER_LOAD_TIMEOUT).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                # Change this to RUNNING unless the state change occurred over 15 minutes ago
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 900):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in guacamole timeout. Changing state to READY"
                        .format(workout.key.name))
                    state_transition(workout, new_state=BUILD_STATES.RUNNING)
                    stop_workout(workout.key.name)
                else:
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in guacamole timeout. Changing state to READY"
                        .format(workout.key.name))
                    print(
                        f"Workout {workout.key.name} stuck in guacamole timeout. Changing state to READY"
                    )
                    state_transition(workout, new_state=BUILD_STATES.READY)

    #
    # Fixing workouts in the state of STARTING. This may occur after a timeout in starting workouts.
    #
    query_start_timeouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_start_timeouts.add_filter("state", "=",
                                        BUILD_STATES.STARTING).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                # Only transition the state if the last state change occurred over 5 minutes ago.
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 300):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in a STARTING state. Stopping the workout."
                        .format(workout.key.name))
                    state_transition(workout, new_state=BUILD_STATES.RUNNING)
                    stop_workout(workout.key.name)

    #
    # Fixing workouts in the state of STOPPING. This may occur after a timeout in stopping workouts.
    #
    query_stop_timeouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_stop_timeouts.add_filter("state", "=",
                                       BUILD_STATES.STOPPING).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                # Only transition the state if the last state change occurred over 5 minutes ago.
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 300):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in a STARTING state. Stopping the workout."
                        .format(workout.key.name))
                    state_transition(workout, new_state=BUILD_STATES.RUNNING)
                    stop_workout(workout.key.name)

    #
    # Fixing workouts in the state of NUKING. This may occur after a timeout in deleting the workouts.
    #
    query_nuking_timeouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_nuking_timeouts.add_filter("state", "=",
                                         BUILD_STATES.NUKING).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                # Only transition the state if the last state change occurred over 5 minutes ago.
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 300):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in a NUKING state. Attempting to nuke again."
                        .format(workout.key.name))
                    nuke_workout(workout.key.name)

    #
    #Fixing machines that did not get built
    #
    query_rebuild = ds_client.query(kind='cybergym-workout')
    query_rebuild.add_filter('state', '=', BUILD_STATES.READY)
    query_rebuild.add_filter('build_project_location', '=', project)
    running_machines = list(query_rebuild.fetch())
    current_machines = compute.instances().list(project=project,
                                                zone=zone).execute()

    list_current = []
    list_running = []
    list_missing = []

    current_machines_items = current_machines.get('items', None)
    while current_machines_items:
        for instance in current_machines_items:
            list_current.append(instance['name'])
        if 'nextPageToken' in current_machines:
            current_machines = compute.instances().list(
                project=project,
                zone=zone,
                pageToken=current_machines['nextPageToken']).execute()
            current_machines_items = current_machines.get('items', None)
        else:
            break
    for i in running_machines:
        unit = ds_client.get(ds_client.key('cybergym-unit', i['unit_id']))
        if unit['build_type'] == 'arena':
            for server in i['student_servers']:
                datastore_server_name = i.key.name + '-' + server['name']
                list_running.append(datastore_server_name)
                if datastore_server_name not in list_current:
                    list_missing.append(datastore_server_name)
        if unit['build_type'] == 'compute':
            for server in i['servers']:
                datastore_server_name = i.key.name + '-' + server['name']
                list_running.append(datastore_server_name)
                if datastore_server_name not in list_current:
                    list_missing.append(datastore_server_name)

    cloud_log('Medic', f'Missing servers{list_missing}', LOG_LEVELS.INFO)

    for server in list_missing:
        cloud_log('Medic', 'Rebuilding server{}'.format(server),
                  LOG_LEVELS.INFO)
        pubsub_topic = PUBSUB_TOPICS.MANAGE_SERVER
        publisher = pubsub_v1.PublisherClient()
        topic_path = publisher.topic_path(project, pubsub_topic)
        future = publisher.publish(topic_path,
                                   data=b'Server Build',
                                   server_name=server,
                                   action=SERVER_ACTIONS.BUILD)

    return
Exemplo n.º 28
0
def build_guacamole_server(build, network, guacamole_connections):
    """
    Builds an image with an Apache Guacamole server and adds startup scripts to insert the
    correct users and connections into the guacamole database. This server becomes the entrypoint
    for all students in the arena.
    :param type: Either workout or arena build.
    :param build: Build Entity for the workout or arena.
    :param network: The network name for the server
    :param guacamole_connections: An array of dictionaries for each student {workoutid, ip address of their server,
        and password for their server.
    :return: Null
    """
    build_id = build.key.name
    if len(guacamole_connections) == 0:
        return None

    startup_script = workout_globals.guac_startup_begin.format(
        guacdb_password=guac_password)
    i = 0
    for connection in guacamole_connections:
        # Get a PRNG password for the workout and store it with the datastore record for display on the workout controller
        guac_user = '******' + str(i + 1)
        guac_connection_password = get_random_alphaNumeric_string()
        workout = ds_client.get(
            ds_client.key('cybergym-workout', connection['workout_id']))
        workout['workout_user'] = guac_user
        workout['workout_password'] = guac_connection_password
        ds_client.put(workout)

        safe_password = connection['password'].replace('$', '\$')
        safe_password = safe_password.replace("'", "\'")
        startup_script += workout_globals.guac_startup_user_add.format(
            user=guac_user,
            name=guac_user,
            guac_password=guac_connection_password)
        if connection['entry_type'] == 'vnc':
            startup_script += workout_globals.guac_startup_vnc.format(
                ip=connection['ip'],
                connection=connection['workout_id'],
                vnc_password=safe_password)
        else:
            startup_script += workout_globals.guac_startup_rdp.format(
                ip=connection['ip'],
                connection=connection['workout_id'],
                rdp_username=connection['username'],
                rdp_password=safe_password,
                security_mode=connection['security-mode'])
        startup_script += workout_globals.guac_startup_join_connection_user
        i += 1
    startup_script += workout_globals.guac_startup_end

    server_name = "%s-%s" % (build_id, 'student-guacamole')
    tags = {'items': ['student-entry']}
    nics = [{
        "network": network,
        "subnet": "%s-%s" % (network, 'default'),
        "external_NAT": True
    }]
    meta_data = {"key": "startup-script", "value": startup_script}
    try:
        create_instance_custom_image(compute=compute,
                                     workout=build_id,
                                     name=server_name,
                                     custom_image=student_entry_image,
                                     machine_type='n1-standard-1',
                                     networkRouting=False,
                                     networks=nics,
                                     tags=tags,
                                     meta_data=meta_data,
                                     sshkey=None,
                                     student_entry=True)

        # Create the firewall rule allowing external access to the guacamole connection
        allow_entry = [{
            "name": "%s-%s" % (build_id, 'allow-student-entry'),
            "network": network,
            "targetTags": ['student-entry'],
            'protocol': None,
            'ports': ['tcp/80,8080,443'],
            'sourceRanges': ['0.0.0.0/0']
        }]
        create_firewall_rules(allow_entry)
    except errors.HttpError as err:
        # 409 error means the server already exists.
        if err.resp.status in [409]:
            pass
        else:
            raise
Exemplo n.º 29
0
def server_build(server_name):
    """
    Builds an individual server based on the specification in the Datastore entity with name server_name.
    :param server_name: The Datastore entity name of the server to build
    :return: A boolean status on the success of the build
    """
    print(f'Building server {server_name}')
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    state_transition(entity=server, new_state=SERVER_STATES.BUILDING)

    # Commented because this is only for Fortinet right now.
    # if 'canIPForward' in server and server['config']['canIpForward']:
    #     image_config = {"name": server_name + "-disk", "sizeGb": 30,
    #                     "type": "projects/" + project + "/zones/" + zone + "/diskTypes/pd-ssd"}
    #     response = compute.disks().insert(project=project, zone=zone, body=image_config).execute()
    #     compute.zoneOperations().wait(project=project, zone=zone, operation=response["id"]).execute()

    # Begin the server build and keep trying for a bounded number of additional 30-second cycles
    i = 0
    build_success = False
    while not build_success and i < 5:
        workout_globals.refresh_api()
        try:
            response = compute.instances().insert(project=project, zone=zone, body=server['config']).execute()
            build_success = True
            print(f'Sent job to build {server_name}, and waiting for response')
        except BrokenPipeError:
            i += 1
    i = 0
    success = False
    while not success and i < 5:
        try:
            print(f"Begin waiting for build operation {response['id']}")
            compute.zoneOperations().wait(project=project, zone=zone, operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            print('Response timeout for build. Trying again')
            pass

    if success:
        print(f'Successfully built server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.RUNNING, existing_state=SERVER_STATES.BUILDING)
    else:
        print(f'Timeout in trying to build server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False

    # If this is a student entry server, register the DNS
    if 'student_entry' in server and server['student_entry']:
        print(f'Setting DNS record for {server_name}')
        ip_address = register_student_entry(server['workout'], server_name)
        server['external_ip'] = ip_address
        ds_client.put(server)
        server = ds_client.get(ds_client.key('cybergym-server', server_name))

    # Now stop the server before completing
    print(f'Stopping {server_name}')
    compute.instances().stop(project=project, zone=zone, instance=server_name).execute()
    state_transition(entity=server, new_state=SERVER_STATES.STOPPED)

    # If no other servers are building, then set the workout to the state of READY.
    build_id = server['workout']
    check_build_state_change(build_id=build_id, check_server_state=SERVER_STATES.STOPPED,
                             change_build_state=BUILD_STATES.READY)
Exemplo n.º 30
0
def server_delete(server_name):
    g_logger = log_client.logger(str(server_name))
    server_list = list(
        ds_client.query(kind='cybergym-server').add_filter(
            'name', '=', str(server_name)).fetch())
    server_is_deleted = list(
        ds_client.query(kind='cybergym-server').add_filter(
            'name', '=', str(server_name)).add_filter('state', '=',
                                                      'DELETED').fetch())
    if server_is_deleted and server_list:
        g_logger.log_text(f'Server "' + server_name +
                          '" has already been deleted.')
        return True
    elif not server_list:
        g_logger.log_text(f'Server of name "' + server_name +
                          '" does not exist in datastore, unable to Delete.')
        return True
    else:
        server = ds_client.get(ds_client.key('cybergym-server', server_name))

    state_transition(entity=server, new_state=SERVER_STATES.DELETING)
    # If there are snapshots associated with this server, then delete the snapshots.
    if 'snapshot' in server and server['snapshot']:
        Snapshot.delete_snapshot(server_name)

    workout_globals.refresh_api()
    try:
        response = compute.instances().delete(project=project,
                                              zone=zone,
                                              instance=server_name).execute()
    except HttpError as exception:
        # If the server is already deleted or no longer exists,
        state_transition(entity=server, new_state=SERVER_STATES.DELETED)
        g_logger.log_text(f"Finished deleting {server_name}")

        # If all servers in the workout have been deleted, then set the workout state to True
        build_id = server['workout']
        check_build_state_change(
            build_id=build_id,
            check_server_state=SERVER_STATES.DELETED,
            change_build_state=BUILD_STATES.COMPLETED_DELETING_SERVERS)
        return True
    g_logger.log_text(
        f'Sent delete request to {server_name}, and waiting for response')
    i = 0
    success = False
    while not success and i < 5:
        try:
            g_logger.log_text(
                f"Begin waiting for delete response from operation {response['id']}"
            )
            compute.zoneOperations().wait(project=project,
                                          zone=zone,
                                          operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            g_logger.log_text(
                'Response timeout for deleting server. Trying again')
            pass
    if not success:
        g_logger.log_text(f'Timeout in trying to delete server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False

    # If this is a student entry server, delete the DNS
    if 'student_entry' in server and server['student_entry']:
        g_logger.log_text(f'Deleting DNS record for {server_name}')
        ip_address = server['external_ip']
        delete_dns(server['workout'], ip_address)

    state_transition(entity=server, new_state=SERVER_STATES.DELETED)
    g_logger.log_text(f"Finished deleting {server_name}")

    # If all servers in the workout have been deleted, then set the workout state to True
    build_id = server['workout']
    check_build_state_change(
        build_id=build_id,
        check_server_state=SERVER_STATES.DELETED,
        change_build_state=BUILD_STATES.COMPLETED_DELETING_SERVERS)
    return True