示例#1
0
def apt_installs(manager=False, single_server_ami=False):

    if manager:
        apt_install_list = APT_MANAGER_INSTALLS
    elif single_server_ami:
        apt_install_list = APT_SINGLE_SERVER_AMI_INSTALLS
    else:
        apt_install_list = APT_WORKER_INSTALLS
    installs_string = " ".join(apt_install_list)

    # Sometimes (usually on slower servers) the remote server isn't done with initial setup when
    # we get to this step, so it has a bunch of retry logic.
    installs_failed = True
    for i in range(10):
        try:
            sudo('apt-get -y update >> {log}'.format(log=LOG_FILE))
            sudo('apt-get -y install {installs} >> {log}'.format(
                installs=installs_string, log=LOG_FILE))
            installs_failed = False
            break
        except FabricExecutionError:
            log.warning(
                "WARNING: encountered problems when trying to run apt installs.\n"
                "Usually this means the server is running a software upgrade in the background.\n"
                "Will try 10 times, waiting 5 seconds each time.")
            sleep(5)

    # we run supervisor manually at the end
    sudo("service supervisor stop")
    if installs_failed:
        raise Exception("Could not install software on remote machine.")
def get_manager_instance_by_eb_environment_name(eb_environment_name):
    """ Get a manager dictionary of the currently running manager server. """
    managers = get_instances_by_name(PROCESSING_MANAGER_NAME % eb_environment_name)

    if len(managers) > 1:
        msg = "Discovered multiple manager servers. This configuration is not supported and should be corrected."
        log.error(msg)
        raise Exception(msg)

    if managers:
        return managers[0]
    else:
        log.warning("No manager found.")
        return None
def setup_rabbitmq(eb_environment_name):
    create_rabbit_mq_password_file(eb_environment_name)

    # push the configuration file so that it listens on the configured port
    put(LOCAL_RABBIT_MQ_CONFIG_FILE_PATH, REMOTE_RABBIT_MQ_CONFIG_FILE_PATH)
    sudo(
        f"cp {REMOTE_RABBIT_MQ_CONFIG_FILE_PATH} {REMOTE_RABBIT_MQ_FINAL_CONFIG_FILE_PATH}"
    )

    # setup a new password
    sudo(
        f"rabbitmqctl add_user beiwe {get_rabbit_mq_password(eb_environment_name)}"
    )
    sudo('rabbitmqctl set_permissions -p / beiwe ".*" ".*" ".*"')
    log.warning("This next command can take quite a while to run.")
    # I tried backgrounding it, doing so breaks celery.  o_O
    sudo("service rabbitmq-server restart")
示例#4
0
def do_create_worker():
    name = prompt_for_extant_eb_environment_name()
    do_fail_if_environment_does_not_exist(name)
    manager_instance = get_manager_instance_by_eb_environment_name(name)
    if manager_instance is None:
        log.error(
            "There is no manager server for the %s cluster, cannot deploy a worker until there is."
            % name)
        EXIT(1)

    try:
        settings = get_server_configuration_file(name)
    except Exception as e:
        log.error("could not read settings file")
        log.error(e)
        settings = None  # ide warnings...
        EXIT(1)

    log.info("creating worker server for %s..." % name)
    try:
        instance = create_processing_server(
            name, settings[WORKER_SERVER_INSTANCE_TYPE])
    except Exception as e:
        log.error(e)
        instance = None  # ide warnings...
        EXIT(1)
    instance_ip = instance['NetworkInterfaces'][0]['PrivateIpAddresses'][0][
        'Association']['PublicIp']

    configure_fabric(name, instance_ip)
    create_swap()
    push_home_directory_files()
    apt_installs()
    load_git_repo()
    setup_python()
    push_beiwe_configuration(name)
    push_manager_private_ip_and_password(name)
    setup_worker_cron()
    setup_celery_worker()  # run setup worker last.
    log.warning(
        "Server is almost up.  Waiting 20 seconds to avoid a race condition..."
    )
    sleep(20)
    run("supervisord")
示例#5
0
def manager_fix():
    # It is unclear what causes this.  The notifications task create zombie processes that on at
    # least one occasion did not respond to kill -9 commands even when run as the superuser. This
    # occurs on both workers and managers, a 20 second sleep operation fixes it, 10 seconds does not.
    # Tested on the slowest server, t3a.nano' with swap that is required to run the celery tasks.)

    # Update: it turns out there is an alternate failure mode if you try to do the 20 second
    # wait (which works for workers), which is that all calls to the celery Inspect object
    # block for exceptionally long periods, even when a timeout value is provided. (This behavior
    # has other triggers too, this is just a reliable way to trigger it.)
    try_sudo("shutdown -r now")
    log.warning("rebooting server to fix rabbitmq bugs...")
    sleep(5)
    retry(run, "# waiting for server to reboot, this might take a while.")

    # we need to re-enable the swap after the reboot, then we can finally start supervisor without
    # creating zombie celery threads.
    sudo("swapon /swapfile")
    sudo("swapon -s")
示例#6
0
####################################################################################################
##################################### Argument Parsing #############################################
####################################################################################################

if __name__ == "__main__":
    # validate the global configuration file
    if not all(
        (are_aws_credentials_present(), is_global_configuration_valid())):
        EXIT(1)

    # get CLI arguments, see function for details
    arguments = cli_args_validation()

    if arguments.prod:
        log.warning("RUNNING IN PROD MODE")
        PROD_MODE.set(True)

    if arguments.dev:
        if PROD_MODE:
            log.error("You cannot provide -prod and -dev at the same time.")
            EXIT(1)
        DEV_MODE.set(True)
        log.warning("RUNNING IN DEV MODE")

    if arguments.help_setup_new_environment:
        do_help_setup_new_environment()
        EXIT(0)

    if arguments.create_environment:
        do_create_environment()