Exemplo n.º 1
0
    def error_rerun_handler(self, exc, errors, email, cloud_id):
        from mist.io.methods import notify_user

        if len(errors) < 6:
            return self.result_fresh  # Retry when the result is no longer fresh

        user = user_from_email(email)

        if len(errors) == 6:  # If does not respond for a minute
            notify_user(user,
                        'Cloud %s does not respond' %
                        user.clouds[cloud_id].title,
                        email_notify=False,
                        cloud_id=cloud_id)

        # Keep retrying for 30 minutes
        times = [60, 60, 120, 300, 600, 600]
        index = len(errors) - 6
        if index < len(times):
            return times[index]
        else:  # If cloud still unresponsive disable it & notify user
            with user.lock_n_load():
                user.clouds[cloud_id].enabled = False
                user.save()
            notify_user(user,
                        "Cloud %s disabled after not responding for 30mins" %
                        user.clouds[cloud_id].title,
                        email_notify=True,
                        cloud_id=cloud_id)
            log_event(user.email,
                      'incident',
                      action='disable_cloud',
                      cloud_id=cloud_id,
                      error="Cloud unresponsive")
Exemplo n.º 2
0
    def error_rerun_handler(self, exc, errors, email, backend_id):
        from mist.io.methods import notify_user

        if len(errors) < 6:
            return self.result_fresh  # Retry when the result is no longer fresh

        user = user_from_email(email)

        if len(errors) == 6: # If does not respond for a minute
            notify_user(user, 'Backend %s does not respond' % user.backends[backend_id].title,
                        email_notify=False, backend_id=backend_id)

        # Keep retrying for 30 minutes
        times = [60, 60, 120, 300, 600, 600]
        index = len(errors) - 6
        if index < len(times):
            return times[index]
        else: # If backend still unresponsive disable it & notify user
            with user.lock_n_load():
                user.backends[backend_id].enabled = False
                user.save()
            notify_user(user, "Backend %s disabled after not responding for 30mins" % user.backends[backend_id].title,
                        email_notify=True, backend_id=backend_id)
            log_event(user.email, 'incident', action='disable_backend',
                      backend_id=backend_id, error="Backend unresponsive")
Exemplo n.º 3
0
def create_machine_async(email, backend_id, key_id, machine_name, location_id,
                         image_id, size_id, script, image_extra, disk,
                          image_name, size_name, location_name, ips, monitoring,
                          networks, docker_env, docker_command,
                          script_id=None, script_params=None,
                          quantity=1, persist=False, job_id=None,
                          docker_port_bindings={}, docker_exposed_ports={},
                          hostname='', plugins=None):
    from multiprocessing.dummy import Pool as ThreadPool
    from mist.io.methods import create_machine
    from mist.io.exceptions import MachineCreationError
    log.warn('MULTICREATE ASYNC %d' % quantity)

    if multi_user:
        from mist.core.helpers import log_event
    else:
        log_event = lambda *args, **kwargs: None
    job_id = job_id or uuid.uuid4().hex

    log_event(email, 'job', 'async_machine_creation_started', job_id=job_id,
              backend_id=backend_id, script=script, script_id=script_id,
              script_params=script_params, monitoring=monitoring,
              persist=persist, quantity=quantity)

    THREAD_COUNT = 5
    pool = ThreadPool(THREAD_COUNT)

    names = []
    for i in range(1, quantity+1):
        names.append('%s-%d' % (machine_name,i))

    user = user_from_email(email)
    specs = []
    for name in names:
        specs.append((
            (user, backend_id, key_id, name, location_id, image_id,
             size_id, script, image_extra, disk, image_name, size_name,
             location_name, ips, monitoring, networks, docker_env,
             docker_command, 22, script_id, script_params, job_id),
            {'hostname': hostname, 'plugins': plugins}
        ))

    def create_machine_wrapper(args_kwargs):
        args, kwargs = args_kwargs
        error = False
        try:
            node = create_machine(*args, **kwargs)
        except MachineCreationError as exc:
            error = str(exc)
        except Exception as exc:
            error = repr(exc)
        finally:
            name = args[3]
            log_event(email, 'job', 'machine_creation_finished', job_id=job_id,
                      backend_id=backend_id, machine_name=name, error=error)

    pool.map(create_machine_wrapper, specs)
    pool.close()
    pool.join()
Exemplo n.º 4
0
 def create_machine_wrapper(args_kwargs):
     args, kwargs = args_kwargs
     error = False
     try:
         node = create_machine(*args, **kwargs)
     except MachineCreationError as exc:
         error = str(exc)
     except Exception as exc:
         error = repr(exc)
     finally:
         name = args[3]
         log_event(email, 'job', 'machine_creation_finished', job_id=job_id,
                   backend_id=backend_id, machine_name=name, error=error)
Exemplo n.º 5
0
 def create_machine_wrapper(args_kwargs):
     args, kwargs = args_kwargs
     error = False
     try:
         node = create_machine(*args, **kwargs)
     except MachineCreationError as exc:
         error = str(exc)
     except Exception as exc:
         error = repr(exc)
     finally:
         name = args[3]
         log_event(email, 'job', 'machine_creation_finished', job_id=job_id,
                   backend_id=backend_id, machine_name=name, error=error)
Exemplo n.º 6
0
def post_deploy_steps(self, email, backend_id, machine_id, monitoring, command,
                      key_id=None, username=None, password=None, port=22,
                      script_id='', script_params='', job_id=None,
                      hostname='', plugins=None,
                      post_script_id='', post_script_params=''):
    from mist.io.methods import connect_provider, probe_ssh_only
    from mist.io.methods import notify_user, notify_admin
    from mist.io.methods import create_dns_a_record
    if multi_user:
        from mist.core.methods import enable_monitoring
        from mist.core.tasks import run_script
        from mist.core.helpers import log_event
    else:
        from mist.io.methods import enable_monitoring
        log_event = lambda *args, **kwargs: None

    job_id = job_id or uuid.uuid4().hex

    user = user_from_email(email)
    tmp_log = lambda msg, *args: log.error('Post deploy: %s' % msg, *args)
    tmp_log('Entering post deploy steps for %s %s %s',
            user.email, backend_id, machine_id)
    try:
        # find the node we're looking for and get its hostname
        node = None
        try:
            conn = connect_provider(user.backends[backend_id])
            nodes = conn.list_nodes() # TODO: use cache
            for n in nodes:
                if n.id == machine_id:
                    node = n
                    break
            tmp_log('run list_machines')
        except:
            raise self.retry(exc=Exception(), countdown=10, max_retries=10)

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            tmp_log('ip not found, retrying')
            raise self.retry(exc=Exception(), countdown=60, max_retries=20)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            # connect with ssh even if no command, to create association
            # to be able to enable monitoring
            tmp_log('attempting to connect to shell')
            key_id, ssh_user = shell.autoconfigure(
                user, backend_id, node.id, key_id, username, password, port
            )
            tmp_log('connected to shell')
            result = probe_ssh_only(user, backend_id, machine_id, host=None,
                           key_id=key_id, ssh_user=ssh_user, shell=shell)
            log_dict = {
                    'email': email,
                    'event_type': 'job',
                    'backend_id': backend_id,
                    'machine_id': machine_id,
                    'job_id': job_id,
                    'host': host,
                    'key_id': key_id,
                    'ssh_user': ssh_user,
                }

            log_event(action='probe', result=result, **log_dict)
            backend = user.backends[backend_id]
            msg = "Backend:\n  Name: %s\n  Id: %s\n" % (backend.title,
                                                        backend_id)
            msg += "Machine:\n  Name: %s\n  Id: %s\n" % (node.name,
                                                             node.id)

            if hostname:
                try:
                    record = create_dns_a_record(user, hostname, host)
                    hostname = '.'.join((record.name, record.zone.domain))
                    log_event(action='create_dns_a_record', hostname=hostname,
                              **log_dict)
                except Exception as exc:
                    log_event(action='create_dns_a_record', error=str(exc),
                              **log_dict)

            error = False
            if script_id and multi_user:
                tmp_log('will run script_id %s', script_id)
                ret = run_script.run(
                    user.email, script_id, backend_id, machine_id,
                    params=script_params, host=host, job_id=job_id
                )
                error = ret['error']
                tmp_log('executed script_id %s', script_id)
            elif command:
                tmp_log('will run command %s', command)
                log_event(action='deployment_script_started', command=command, **log_dict)
                start_time = time()
                retval, output = shell.command(command)
                tmp_log('executed command %s', command)
                execution_time = time() - start_time
                output = output.decode('utf-8','ignore')
                title = "Deployment script %s" % ('failed' if retval
                                                  else 'succeeded')
                error = retval > 0
                notify_user(user, title,
                            backend_id=backend_id,
                            machine_id=machine_id,
                            machine_name=node.name,
                            command=command,
                            output=output,
                            duration=execution_time,
                            retval=retval,
                            error=retval > 0)
                log_event(action='deployment_script_finished',
                          error=retval > 0,
                          return_value=retval,
                          command=command,
                          stdout=output,
                          **log_dict)

            shell.disconnect()

            if monitoring:
                try:
                    enable_monitoring(user, backend_id, node.id,
                        name=node.name, dns_name=node.extra.get('dns_name',''),
                        public_ips=ips, no_ssh=False, dry=False, job_id=job_id,
                        plugins=plugins, deploy_async=False,
                    )
                except Exception as e:
                    print repr(e)
                    error = True
                    notify_user(user, "Enable monitoring failed for machine %s" % machine_id, repr(e))
                    notify_admin('Enable monitoring on creation failed for user %s machine %s: %r' % (email, machine_id, e))
                    log_event(action='enable_monitoring_failed', error=repr(e),
                              **log_dict)

            if post_script_id and multi_user:
                tmp_log('will run post_script_id %s', post_script_id)
                ret = run_script.run(
                    user.email, post_script_id, backend_id, machine_id,
                    params=post_script_params, host=host, job_id=job_id,
                    action_prefix='post_',
                )
                error = ret['error']
                tmp_log('executed post_script_id %s', script_id)

            log_event(action='post_deploy_finished', error=error, **log_dict)

        except (ServiceUnavailableError, SSHException) as exc:
            tmp_log(repr(exc))
            raise self.retry(exc=exc, countdown=60, max_retries=15)
    except Exception as exc:
        tmp_log(repr(exc))
        if str(exc).startswith('Retry'):
            raise
        notify_user(user, "Deployment script failed for machine %s" % machine_id)
        notify_admin("Deployment script failed for machine %s in backend %s by user %s" % (machine_id, backend_id, email), repr(exc))
        log_event(
            email=email,
            event_type='job',
            action='post_deploy_finished',
            backend_id=backend_id,
            machine_id=machine_id,
            enable_monitoring=bool(monitoring),
            command=command,
            error="Couldn't connect to run post deploy steps.",
            job_id=job_id
        )
Exemplo n.º 7
0
def post_deploy_steps(self,
                      email,
                      backend_id,
                      machine_id,
                      monitoring,
                      command,
                      key_id=None,
                      username=None,
                      password=None,
                      port=22):
    from mist.io.methods import ssh_command, connect_provider, enable_monitoring
    from mist.io.methods import notify_user, notify_admin
    if multi_user:
        from mist.core.methods import enable_monitoring
        from mist.core.helpers import log_event
    else:
        from mist.io.methods import enable_monitoring
        log_event = lambda *args, **kwargs: None

    user = user_from_email(email)
    try:

        # find the node we're looking for and get its hostname
        conn = connect_provider(user.backends[backend_id])
        nodes = conn.list_nodes()
        node = None
        for n in nodes:
            if n.id == machine_id:
                node = n
                break

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            raise self.retry(exc=Exception(), countdown=120, max_retries=5)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            # connect with ssh even if no command, to create association
            # to be able to enable monitoring
            key_id, ssh_user = shell.autoconfigure(user, backend_id, node.id,
                                                   key_id, username, password,
                                                   port)

            backend = user.backends[backend_id]
            msg = "Backend:\n  Name: %s\n  Id: %s\n" % (backend.title,
                                                        backend_id)
            msg += "Machine:\n  Name: %s\n  Id: %s\n" % (node.name, node.id)
            if command:
                log_dict = {
                    'email': email,
                    'event_type': 'job',
                    'backend_id': backend_id,
                    'machine_id': machine_id,
                    'job_id': uuid.uuid4().hex,
                    'command': command,
                    'host': host,
                    'key_id': key_id,
                    'ssh_user': ssh_user,
                }
                log_event(action='deployment_script_started', **log_dict)
                start_time = time()
                retval, output = shell.command(command)
                execution_time = time() - start_time
                output = output.decode('utf-8', 'ignore')
                title = "Deployment script %s" % ('failed'
                                                  if retval else 'succeeded')
                notify_user(user,
                            title,
                            backend_id=backend_id,
                            machine_id=machine_id,
                            machine_name=node.name,
                            command=command,
                            output=output,
                            duration=execution_time,
                            retval=retval,
                            error=retval > 0)
                log_event(action='deployment_script_finished',
                          error=retval > 0,
                          return_value=retval,
                          stdout=output,
                          **log_dict)

            shell.disconnect()

            if monitoring:
                try:
                    enable_monitoring(
                        user,
                        backend_id,
                        node.id,
                        name=node.name,
                        dns_name=node.extra.get('dns_name', ''),
                        public_ips=ips,
                        no_ssh=False,
                        dry=False,
                    )
                except Exception as e:
                    print repr(e)
                    notify_user(
                        user, "Enable monitoring failed for machine %s (%s)" %
                        (node.name, node.id), repr(e))
                    notify_admin(
                        'Enable monitoring on creation failed for user %s machine %s: %r'
                        % (email, node.name, e))

        except (ServiceUnavailableError, SSHException) as exc:
            raise self.retry(exc=exc, countdown=60, max_retries=5)
    except Exception as exc:
        if str(exc).startswith('Retry'):
            raise
        notify_user(
            user, "Deployment script failed for machine %s after 5 retries" %
            node.id)
        notify_admin(
            "Deployment script failed for machine %s in backend %s by user %s after 5 retries"
            % (node.id, backend_id, email), repr(exc))
        log_event(
            email=email,
            event_type='job',
            action='deployment_script_failed',
            backend_id=backend_id,
            machine_id=machine_id,
            enable_monitoring=bool(monitoring),
            command=command,
            error="Couldn't connect to run post deploy steps (5 attempts).",
        )
Exemplo n.º 8
0
def create_machine_async(email,
                         cloud_id,
                         key_id,
                         machine_name,
                         location_id,
                         image_id,
                         size_id,
                         script,
                         image_extra,
                         disk,
                         image_name,
                         size_name,
                         location_name,
                         ips,
                         monitoring,
                         networks,
                         docker_env,
                         docker_command,
                         script_id='',
                         script_params='',
                         post_script_id='',
                         post_script_params='',
                         quantity=1,
                         persist=False,
                         job_id=None,
                         docker_port_bindings={},
                         docker_exposed_ports={},
                         azure_port_bindings='',
                         hostname='',
                         plugins=None,
                         disk_size=None,
                         disk_path=None,
                         cloud_init='',
                         associate_floating_ip=False,
                         associate_floating_ip_subnet=None,
                         project_id=None):
    from multiprocessing.dummy import Pool as ThreadPool
    from mist.io.methods import create_machine
    from mist.io.exceptions import MachineCreationError
    log.warn('MULTICREATE ASYNC %d' % quantity)

    if multi_user:
        from mist.core.helpers import log_event
    else:
        log_event = lambda *args, **kwargs: None
    job_id = job_id or uuid.uuid4().hex

    log_event(email,
              'job',
              'async_machine_creation_started',
              job_id=job_id,
              cloud_id=cloud_id,
              script=script,
              script_id=script_id,
              script_params=script_params,
              monitoring=monitoring,
              persist=persist,
              quantity=quantity)

    THREAD_COUNT = 5
    pool = ThreadPool(THREAD_COUNT)

    names = []
    for i in range(1, quantity + 1):
        names.append('%s-%d' % (machine_name, i))

    user = user_from_email(email)
    specs = []
    for name in names:
        specs.append(
            ((user, cloud_id, key_id, name, location_id, image_id, size_id,
              script, image_extra, disk, image_name, size_name, location_name,
              ips, monitoring, networks, docker_env, docker_command, 22,
              script_id, script_params, job_id), {
                  'hostname': hostname,
                  'plugins': plugins,
                  'post_script_id': post_script_id,
                  'post_script_params': post_script_params,
                  'azure_port_bindings': azure_port_bindings,
                  'associate_floating_ip': associate_floating_ip,
                  'cloud_init': cloud_init,
                  'disk_size': disk_size,
                  'disk_path': disk_path,
                  'project_id': project_id
              }))

    def create_machine_wrapper(args_kwargs):
        args, kwargs = args_kwargs
        error = False
        try:
            node = create_machine(*args, **kwargs)
        except MachineCreationError as exc:
            error = str(exc)
        except Exception as exc:
            error = repr(exc)
        finally:
            name = args[3]
            log_event(email,
                      'job',
                      'machine_creation_finished',
                      job_id=job_id,
                      cloud_id=cloud_id,
                      machine_name=name,
                      error=error)

    pool.map(create_machine_wrapper, specs)
    pool.close()
    pool.join()
Exemplo n.º 9
0
def post_deploy_steps(self,
                      email,
                      cloud_id,
                      machine_id,
                      monitoring,
                      command,
                      key_id=None,
                      username=None,
                      password=None,
                      port=22,
                      script_id='',
                      script_params='',
                      job_id=None,
                      hostname='',
                      plugins=None,
                      post_script_id='',
                      post_script_params=''):
    from mist.io.methods import connect_provider, probe_ssh_only
    from mist.io.methods import notify_user, notify_admin
    from mist.io.methods import create_dns_a_record
    if multi_user:
        from mist.core.methods import enable_monitoring
        from mist.core.tasks import run_script
        from mist.core.helpers import log_event
    else:
        from mist.io.methods import enable_monitoring
        log_event = lambda *args, **kwargs: None

    job_id = job_id or uuid.uuid4().hex

    user = user_from_email(email)
    tmp_log = lambda msg, *args: log.error('Post deploy: %s' % msg, *args)
    tmp_log('Entering post deploy steps for %s %s %s', user.email, cloud_id,
            machine_id)
    try:
        # find the node we're looking for and get its hostname
        node = None
        try:
            conn = connect_provider(user.clouds[cloud_id])
            nodes = conn.list_nodes()  # TODO: use cache
            for n in nodes:
                if n.id == machine_id:
                    node = n
                    break
            tmp_log('run list_machines')
        except:
            raise self.retry(exc=Exception(), countdown=10, max_retries=10)

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            tmp_log('ip not found, retrying')
            raise self.retry(exc=Exception(), countdown=60, max_retries=20)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            # connect with ssh even if no command, to create association
            # to be able to enable monitoring
            tmp_log('attempting to connect to shell')
            key_id, ssh_user = shell.autoconfigure(user, cloud_id, node.id,
                                                   key_id, username, password,
                                                   port)
            tmp_log('connected to shell')
            result = probe_ssh_only(user,
                                    cloud_id,
                                    machine_id,
                                    host=None,
                                    key_id=key_id,
                                    ssh_user=ssh_user,
                                    shell=shell)
            log_dict = {
                'email': email,
                'event_type': 'job',
                'cloud_id': cloud_id,
                'machine_id': machine_id,
                'job_id': job_id,
                'host': host,
                'key_id': key_id,
                'ssh_user': ssh_user,
            }

            log_event(action='probe', result=result, **log_dict)
            cloud = user.clouds[cloud_id]
            msg = "Cloud:\n  Name: %s\n  Id: %s\n" % (cloud.title, cloud_id)
            msg += "Machine:\n  Name: %s\n  Id: %s\n" % (node.name, node.id)

            if hostname:
                try:
                    record = create_dns_a_record(user, hostname, host)
                    hostname = '.'.join((record.name, record.zone.domain))
                    log_event(action='create_dns_a_record',
                              hostname=hostname,
                              **log_dict)
                except Exception as exc:
                    log_event(action='create_dns_a_record',
                              error=str(exc),
                              **log_dict)

            error = False
            if script_id and multi_user:
                tmp_log('will run script_id %s', script_id)
                ret = run_script.run(user.email,
                                     script_id,
                                     cloud_id,
                                     machine_id,
                                     params=script_params,
                                     host=host,
                                     job_id=job_id)
                error = ret['error']
                tmp_log('executed script_id %s', script_id)
            elif command:
                tmp_log('will run command %s', command)
                log_event(action='deployment_script_started',
                          command=command,
                          **log_dict)
                start_time = time()
                retval, output = shell.command(command)
                tmp_log('executed command %s', command)
                execution_time = time() - start_time
                output = output.decode('utf-8', 'ignore')
                title = "Deployment script %s" % ('failed'
                                                  if retval else 'succeeded')
                error = retval > 0
                notify_user(user,
                            title,
                            cloud_id=cloud_id,
                            machine_id=machine_id,
                            machine_name=node.name,
                            command=command,
                            output=output,
                            duration=execution_time,
                            retval=retval,
                            error=retval > 0)
                log_event(action='deployment_script_finished',
                          error=retval > 0,
                          return_value=retval,
                          command=command,
                          stdout=output,
                          **log_dict)

            shell.disconnect()

            if monitoring:
                try:
                    enable_monitoring(
                        user,
                        cloud_id,
                        node.id,
                        name=node.name,
                        dns_name=node.extra.get('dns_name', ''),
                        public_ips=ips,
                        no_ssh=False,
                        dry=False,
                        job_id=job_id,
                        plugins=plugins,
                        deploy_async=False,
                    )
                except Exception as e:
                    print repr(e)
                    error = True
                    notify_user(
                        user,
                        "Enable monitoring failed for machine %s" % machine_id,
                        repr(e))
                    notify_admin(
                        'Enable monitoring on creation failed for user %s machine %s: %r'
                        % (email, machine_id, e))
                    log_event(action='enable_monitoring_failed',
                              error=repr(e),
                              **log_dict)

            if post_script_id and multi_user:
                tmp_log('will run post_script_id %s', post_script_id)
                ret = run_script.run(
                    user.email,
                    post_script_id,
                    cloud_id,
                    machine_id,
                    params=post_script_params,
                    host=host,
                    job_id=job_id,
                    action_prefix='post_',
                )
                error = ret['error']
                tmp_log('executed post_script_id %s', script_id)

            log_event(action='post_deploy_finished', error=error, **log_dict)

        except (ServiceUnavailableError, SSHException) as exc:
            tmp_log(repr(exc))
            raise self.retry(exc=exc, countdown=60, max_retries=15)
    except Exception as exc:
        tmp_log(repr(exc))
        if str(exc).startswith('Retry'):
            raise
        notify_user(user,
                    "Deployment script failed for machine %s" % machine_id)
        notify_admin(
            "Deployment script failed for machine %s in cloud %s by user %s" %
            (machine_id, cloud_id, email), repr(exc))
        log_event(email=email,
                  event_type='job',
                  action='post_deploy_finished',
                  cloud_id=cloud_id,
                  machine_id=machine_id,
                  enable_monitoring=bool(monitoring),
                  command=command,
                  error="Couldn't connect to run post deploy steps.",
                  job_id=job_id)
Exemplo n.º 10
0
def create_machine_async(email, cloud_id, key_id, machine_name, location_id,
                         image_id, size_id, script, image_extra, disk,
                         image_name, size_name, location_name, ips, monitoring,
                         networks, docker_env, docker_command,
                         script_id='', script_params='',
                         post_script_id='', post_script_params='',
                         quantity=1, persist=False, job_id=None,
                         docker_port_bindings={}, docker_exposed_ports={},
                         azure_port_bindings='', hostname='', plugins=None,
                         disk_size=None, disk_path=None,
                         cloud_init='', associate_floating_ip=False,
                         associate_floating_ip_subnet=None, project_id=None,
                         bare_metal=False, hourly=True,
                         cronjob={}):
    from multiprocessing.dummy import Pool as ThreadPool
    from mist.io.methods import create_machine
    from mist.io.exceptions import MachineCreationError
    log.warn('MULTICREATE ASYNC %d' % quantity)

    if multi_user:
        from mist.core.helpers import log_event
    else:
        log_event = lambda *args, **kwargs: None
    job_id = job_id or uuid.uuid4().hex

    if quantity == 1:
        names = [machine_name]
    else:
        names = []
        for i in range(1, quantity+1):
            names.append('%s-%d' % (machine_name, i))

    log_event(email, 'job', 'async_machine_creation_started', job_id=job_id,
              cloud_id=cloud_id, script=script, script_id=script_id,
              script_params=script_params, monitoring=monitoring,
              persist=persist, quantity=quantity, key_id=key_id,
              machine_names=names)

    THREAD_COUNT = 5
    pool = ThreadPool(THREAD_COUNT)

    user = user_from_email(email)
    specs = []
    for name in names:
        specs.append((
            (user, cloud_id, key_id, name, location_id, image_id,
             size_id, script, image_extra, disk, image_name, size_name,
             location_name, ips, monitoring, networks, docker_env,
             docker_command, 22, script_id, script_params, job_id),
            {'hostname': hostname, 'plugins': plugins,
             'post_script_id': post_script_id,
             'post_script_params': post_script_params,
             'azure_port_bindings': azure_port_bindings,
             'associate_floating_ip': associate_floating_ip,
             'cloud_init': cloud_init,
             'disk_size': disk_size,
             'disk_path': disk_path,
             'project_id': project_id,
             'cronjob': cronjob}
        ))

    def create_machine_wrapper(args_kwargs):
        args, kwargs = args_kwargs
        error = False
        node = {}
        try:
            node = create_machine(*args, **kwargs)
        except MachineCreationError as exc:
            error = str(exc)
        except Exception as exc:
            error = repr(exc)
        finally:
            name = args[3]
            log_event(email, 'job', 'machine_creation_finished', job_id=job_id,
                      cloud_id=cloud_id, machine_name=name, error=error,
                      machine_id=node.get('id', ''))

    pool.map(create_machine_wrapper, specs)
    pool.close()
    pool.join()
Exemplo n.º 11
0
def post_deploy_steps(self, email, backend_id, machine_id, monitoring, command,
                      key_id=None, username=None, password=None, port=22):
    from mist.io.methods import ssh_command, connect_provider, enable_monitoring
    from mist.io.methods import notify_user, notify_admin
    if multi_user:
        from mist.core.methods import enable_monitoring
        from mist.core.helpers import log_event
    else:
        from mist.io.methods import enable_monitoring
        log_event = lambda *args, **kwargs: None

    user = user_from_email(email)
    try:

        # find the node we're looking for and get its hostname
        conn = connect_provider(user.backends[backend_id])
        nodes = conn.list_nodes()
        node = None
        for n in nodes:
            if n.id == machine_id:
                node = n
                break

        if node and len(node.public_ips):
            # filter out IPv6 addresses
            ips = filter(lambda ip: ':' not in ip, node.public_ips)
            host = ips[0]
        else:
            raise self.retry(exc=Exception(), countdown=120, max_retries=5)

        try:
            from mist.io.shell import Shell
            shell = Shell(host)
            # connect with ssh even if no command, to create association
            # to be able to enable monitoring
            key_id, ssh_user = shell.autoconfigure(
                user, backend_id, node.id, key_id, username, password, port
            )

            backend = user.backends[backend_id]
            msg = "Backend:\n  Name: %s\n  Id: %s\n" % (backend.title,
                                                        backend_id)
            msg += "Machine:\n  Name: %s\n  Id: %s\n" % (node.name,
                                                             node.id)
            if command:
                log_dict = {
                    'email': email,
                    'event_type': 'job',
                    'backend_id': backend_id,
                    'machine_id': machine_id,
                    'job_id': uuid.uuid4().hex,
                    'command': command,
                    'host': host,
                    'key_id': key_id,
                    'ssh_user': ssh_user,
                }
                log_event(action='deployment_script_started', **log_dict)
                start_time = time()
                retval, output = shell.command(command)
                execution_time = time() - start_time
                output = output.decode('utf-8','ignore')
                title = "Deployment script %s" % ('failed' if retval
                                                  else 'succeeded')
                notify_user(user, title,
                            backend_id=backend_id,
                            machine_id=machine_id,
                            machine_name=node.name,
                            command=command,
                            output=output,
                            duration=execution_time,
                            retval=retval,
                            error=retval > 0)
                log_event(action='deployment_script_finished',
                          error=retval > 0,
                          return_value=retval,
                          stdout=output,
                          **log_dict)

            shell.disconnect()

            if monitoring:
                try:
                    enable_monitoring(user, backend_id, node.id,
                        name=node.name, dns_name=node.extra.get('dns_name',''),
                        public_ips=ips, no_ssh=False, dry=False,
                    )
                except Exception as e:
                    print repr(e)
                    notify_user(user, "Enable monitoring failed for machine %s (%s)" % (node.name, node.id), repr(e))
                    notify_admin('Enable monitoring on creation failed for user %s machine %s: %r' % (email, node.name, e))

        except (ServiceUnavailableError, SSHException) as exc:
            raise self.retry(exc=exc, countdown=60, max_retries=5)
    except Exception as exc:
        if str(exc).startswith('Retry'):
            raise
        notify_user(user, "Deployment script failed for machine %s after 5 retries" % node.id)
        notify_admin("Deployment script failed for machine %s in backend %s by user %s after 5 retries" % (node.id, backend_id, email), repr(exc))
        log_event(
            email=email,
            event_type='job',
            action='deployment_script_failed',
            backend_id=backend_id,
            machine_id=machine_id,
            enable_monitoring=bool(monitoring),
            command=command,
            error="Couldn't connect to run post deploy steps (5 attempts).",
        )