Exemplo n.º 1
0
def install(alsi=None):
    from assemblyline.al.common import forge
    ds = forge.get_datastore()

    ip = net.get_hostip()
    mac = net.get_mac_for_ip(ip)

    existing_reg = ds.get_node(mac)
    if existing_reg:
        alsi.info("Registration already exist. Skipping...")
        return
    reg = DEFAULT_CORE_REGISTRATION.copy()
    reg['hostname'] = net.get_hostname()
    reg['ip'] = ip
    reg['mac_address'] = mac
    reg['machine_info'] = sysinfo.get_machine_info()
    reg['platform'] = sysinfo.get_platform()
    if 'roles' not in reg:
        reg['roles'] = []
    if "dispatcher" not in reg["roles"]:
        reg['roles'].append("dispatcher")
    if "middleman" not in reg["roles"]:
        reg['roles'].append("middleman")
    ds.save_node(mac, reg)
    alsi.info("Core server registered!")
Exemplo n.º 2
0
def start():
    bad_disks = []
    # Test if smartmontools is installed
    try:
        subprocess.call(['smartctl'],
                        stderr=subprocess.PIPE,
                        stdout=subprocess.PIPE)
    except OSError:
        # Install smartmontools
        ret = subprocess.call(
            ["sudo", "apt-get", "-y", "install", "smartmontools"],
            stderr=subprocess.PIPE,
            stdout=subprocess.PIPE)
        if ret != 0:
            exit("Can't install smartmontools, stopping...")

    # Find drives
    proc = subprocess.Popen(['smartctl', '--scan'],
                            stderr=subprocess.PIPE,
                            stdout=subprocess.PIPE)
    out, _ = proc.communicate()

    if out:
        device = out.split(" ")[0]

        for x in xrange(16):
            status_proc = subprocess.Popen(
                ['smartctl', '-H', '-d',
                 'megaraid,%s' % x, device],
                stderr=subprocess.PIPE,
                stdout=subprocess.PIPE)
            status_out, status_err = status_proc.communicate()
            if "INQUIRY failed" in status_out:
                break
            else:
                # Report status
                if is_drive_ok(status_out):
                    log.info("All is good with drive: %s [disk: %s]" %
                             (device, x))
                else:
                    bad_disks.append((device, x))
                    log.error(
                        "Device %s [disk: %s] has a failure state. Report to your administrator."
                        % (device, x))

        if len(bad_disks) > 0:
            bad_disk_body = {
                'hostname': get_hostname(),
                'ip': get_hostip(),
                'bad_disks': [x[1] for x in bad_disks],
                'device': device
            }
            msg = message.Message(to="*",
                                  sender='harddrive_monitor',
                                  mtype=message.MT_HARDDRIVE_FAILURES,
                                  body=bad_disk_body).as_dict()
            statusq = queue.CommsQueue('status')
            statusq.publish(msg)
Exemplo n.º 3
0
def get_mac_for_serviceinstance(servicename, seperator=':'):
    """ servicename is of form Name.Instance """
    hostip = net.get_hostip()
    hostbyte = int(hostip.split('.')[-1])
    name, instance = servicename.split('.')
    md5hex = hashlib.md5(name).hexdigest()
    service_byte1 = int(md5hex[:2], 16)
    service_byte2 = int(md5hex[2:4], 16)
    mac = [0x52, 0x54, hostbyte, service_byte1, service_byte2, int(instance)]
    return seperator.join("%02x" % x for x in mac).upper()
Exemplo n.º 4
0
def get_machine_info(is_agent=False):
    import psutil
    out = {'cores': len(psutil.cpu_percent(interval=0.01, percpu=True)),
           'memory': "%.1f" % (float(psutil.phymem_usage().total) / 1024 / 1024 / 1024), 'os': get_osinfo(),
           'ip': net.get_hostip()}
    if is_agent:
        out['uid'] = "Agent-" + net.get_mac_address()
    else:
        out['uid'] = "Core-" + net.get_mac_address()
    out['name'] = net.get_hostname()
    
    return out
Exemplo n.º 5
0
def main():
    global previous_disk_io, previous_net_io
    elastic_ip = config.get('logging', {}).get('logserver',
                                               {}).get('node', None)
    elastic_port = config.get('logging', {}).get('logserver',
                                                 {}).get('elastic',
                                                         {}).get('port', 9200)

    if not elastic_ip or not elastic_port:
        log.error(
            "Elasticsearch cluster not configured in the seed. There is no need to gather stats on this box."
        )
        sys.exit(1)

    scheduler = Scheduler()
    cur_host = get_hostname()
    cur_ip = get_hostip()
    es = elasticsearch.Elasticsearch([{
        'host': elastic_ip,
        'port': elastic_port
    }])

    scheduler.add_interval_job(calculate_system_metrics,
                               seconds=60,
                               kwargs={
                                   "es": es,
                                   "cur_ip": cur_ip,
                                   "cur_host": cur_host
                               })

    if is_riak(cur_ip, cur_host):
        scheduler.add_interval_job(calculate_solr_metrics,
                                   seconds=60,
                                   kwargs={
                                       "es": es,
                                       "cur_ip": cur_ip,
                                       "cur_host": cur_host
                                   })

        scheduler.add_interval_job(calculate_riak_metrics,
                                   seconds=60,
                                   kwargs={
                                       "es": es,
                                       "cur_ip": cur_ip,
                                       "cur_host": cur_host
                                   })

    scheduler.daemonic = False
    scheduler.start()
Exemplo n.º 6
0
    def __init__(
            self,
            service_manager,  #pylint: disable=R0913
            control_queue=None,
            debug=False,
            high=config.core.dispatcher.max.inflight /
        config.core.dispatcher.shards,
            pop=forge.get_dispatch_queue().pop,
            shard='0'):
        if debug:
            self.debug = log.info
        else:
            self.debug = lambda *msg: None

        self.hostinfo = {
            'ip:': get_hostip(),
            'mac_address': get_mac_address(),
            'host': get_hostname(),
        }

        self.ack_timeout = {}
        self.child_timeout = {}
        self.completed = {}
        self.control_queue = control_queue or \
            forge.get_control_queue('control-queue-' + shard)
        self.drain = False
        self.entries = {}
        self.errors = {}
        self.high = high
        self.ingest_queue = 'ingest-queue-' + shard
        self.last_check = 0
        self.lock = threading.Lock()
        self.pop = pop
        self.queue_size = {}
        # Reponse queues are named: <hostname>-<pid>-<seconds>-<shard>.
        self.response_queue = '-'.join((socket.gethostname(), str(os.getpid()),
                                        str(int(time.time())), shard))
        self.results = {}
        self.running = False
        self.score = {}
        self.service_manager = service_manager
        self.service_timeout = {}
        self.shard = shard
        self.storage_queue = LocalQueue()
        self.watchers = {}

        log.info('Dispatcher started. Dispatching to services:{0}'.format(
            [s for s in service_manager.services]))
Exemplo n.º 7
0
 def __init__(self, vmcfg=None):
     self.disk_root = config.workers.virtualmachines.disk_root
     if not os.path.exists(self.disk_root):
         os.makedirs(self.disk_root)
     self.vmm = None
     self.cfg = vmcfg
     if vmcfg is None:
         self.cfg = get_vmcfg_for_localhost()
     self.vmrevert_scheduler = None
     self.host_ip = net.get_hostip()
     self.host_mac = net.get_mac_for_ip(self.host_ip)
     self.log = logging.getLogger('assemblyline.vmm')
     self.vm_profiles = {}
     self.vmm_lock = threading.Lock()
     self._state = State.INIT
     self._hostagent_client = ServiceAgentClient(async=True)
     self.store = forge.get_datastore()
     self.vm_configs = {}
Exemplo n.º 8
0
    def start(self):
        global counts  # pylint: disable=W0603

        # Publish counters to the metrics sink.
        counts = counter.AutoExportingCounters(
            name='dispatcher-%s' % self.shard,
            host=net.get_hostip(),
            auto_flush=True,
            auto_log=False,
            export_interval_secs=config.system.update_interval,
            channel=forge.get_metrics_sink(),
            counter_type='dispatcher')
        counts.start()

        self.service_manager.start()

        # This starts a thread that polls for messages with an exponential
        # backoff, if no messages are found, to a maximum of one second.
        minimum = -6
        maximum = 0
        self.running = True

        threading.Thread(target=self.heartbeat).start()
        for _ in range(8):
            threading.Thread(target=self.writer).start()

        signal.signal(signal.SIGINT, self.interrupt)

        time.sleep(2 * int(config.system.update_interval))

        exp = minimum
        while self.running:
            if self.poll(len(self.entries)):
                exp = minimum
                continue
            if self.drain and not self.entries:
                break
            time.sleep(2**exp)
            exp = exp + 1 if exp < maximum else exp
            self.check_timeouts()

        counts.stop()
Exemplo n.º 9
0
    def __init__(self):
        self.ip = net.get_hostip()
        self.mac = net.get_mac_for_ip(self.ip)
        self.store = forge.get_datastore()
        self.log = logging.getLogger('assemblyline.agent')
        self.log.info('Starting HostAgent: MAC[%s] STORE[%s]' %
                      (self.mac, self.store))

        # This hosts registration from riak (Hosts tab in UI).
        self.registration = None
        self.service_manager = None
        self.vm_manager = None
        self.flex_manager = None
        self.lock = None
        self.consumer_thread = None
        self._should_run = False
        self.host_profile = {}
        self.executor_thread = None

        # Chores are actions that we run periodically and which we coallesce
        # when the same chore is requested multiple times in the same tick.
        # Jobs are executed as they are received.
        self.jobs = LocalQueue()
        self.last_heartbeat = 0
        self.rpc_handlers = {
            AgentRequest.PING: self.ping,
            AgentRequest.DRAIN: self.drain,
            AgentRequest.UNDRAIN: self.undrain,
            AgentRequest.SHUTDOWN: self.shutdown,
            AgentRequest.VM_LIST: self.list_vms,
            AgentRequest.VM_START: self.start_vm,
            AgentRequest.VM_STOP: self.stop_vm,
            AgentRequest.VM_STOP_ALL: self.stop_all_vms,
            AgentRequest.VM_RESTART: self.restart_vm,
            AgentRequest.VM_REFRESH_ALL: self.refresh_vm_all,
            AgentRequest.VM_REFRESH_FLEET: self.refresh_vm_fleet,
            AgentRequest.VM_GET_REVERT_TIMES: self.vm_get_revert_times,
            AgentRequest.START_SERVICES: self.start_services,
            AgentRequest.STOP_SERVICES: self.stop_services,
        }

        self._should_run = True
Exemplo n.º 10
0
    def __init__(self):
        # Delay these imports so most nodes don't import them.
        global Scheduler
        from apscheduler.scheduler import Scheduler

        self.bottleneck_queue_sizes = {}
        self.cores = None
        self.datastore = forge.get_datastore()
        self.flex_profile = None
        self.flex_scheduler = None
        self.log = logging.getLogger('assemblyline.flex')
        self.mac = net.get_mac_for_ip(net.get_hostip())
        self.main_bottleneck = ''
        self.needs_cleanup = True
        self.previous_queue_sizes = {}
        self.safe_start_dict = {}
        self.safeq = NamedQueue('safe-start-%s' % self.mac)
        self.service_manager = None
        self.ram_mb = None
        self.tick_count = 0
        self.vm_manager = None
Exemplo n.º 11
0
def get_vmcfg_for_localhost():
    ip = net.get_hostip()
    mac = net.get_mac_for_ip(ip)
    store = forge.get_datastore()
    host_registration = store.get_node(mac)
    if not host_registration:
        raise ConfigException('Could not find host registration fr %s' % mac)

    profile_name = host_registration.get('profile', None)
    if not profile_name:
        raise ConfigException('Could not find profile for host: %s' % mac)

    host_profile = store.get_profile(profile_name)
    if not host_profile:
        raise ConfigException('Could not fetch host profile %s' % profile_name)

    vm_config = host_profile.get('virtual_machines', None)
    if not vm_config:
        raise ConfigException('Could not find virtual machine section in %s' % profile_name)
    store.client.close()
    return vm_config
Exemplo n.º 12
0
def create_transport(url):
    """
    Transport are being initiated using an URL. They follow the normal url format:
    ftp://user:[email protected]/path/to/file

    In this example, it will extract the following parameters:
    scheme: ftp
    host: host.com
    user: user
    password: pass

    Certain transports can have extra parameters, those parameters need to be specified in the query part of the url.
    e.g.: sftp://host.com/path/to/file?private_key=/etc/ssl/pkey&private_key_pass=pass&validate_host=true
    scheme: ftp
    host: host.com
    user:
    password:
    private_key: /etc/ssl/pkey
    private_key_pass: pass
    validate_host: True

    NOTE: For transport with extra parameters, only specific extra parameters are allow. This is the list of extra
          parameter allowed:

          ftp: None
          http: pki (string)
          sftp: private_key (string), private_key_pass (string), validate_host (bool)
          file: None
    """

    parsed = urlparse(url)

    base = parsed.path or '/'
    host = parsed.hostname
    if parsed.password:
        password = unquote(parsed.password)
    else:
        password = ''
    user = parsed.username or ''

    scheme = parsed.scheme.lower()
    if (scheme == 'ftp' or scheme == 'sftp' or scheme
            == 'http') and (host == get_hostname() or host == get_hostip()):
        scheme = 'file'

    if scheme == 'ftp':
        from assemblyline.al.common.transport.ftp import TransportFTP
        base = base.replace(config.filestore.ftp_root, "")
        t = TransportFTP(base=base, host=host, password=password, user=user)
    elif scheme == "sftp":
        from assemblyline.al.common.transport.sftp import TransportSFTP

        def get_extras(parsed_dict):
            valid_str_keys = ['private_key', 'private_key_pass']
            valid_bool_keys = ['validate_host']

            out = {}
            for k, v in parsed_dict.iteritems():
                if k in valid_bool_keys:
                    if v[0].lower() == 'true':
                        out[k] = True
                    elif v[0].lower() == 'true':
                        out[k] = False
                if k in valid_str_keys:
                    out[k] = v[0]

            return out

        extras = get_extras(parse_qs(parsed.query))
        t = TransportSFTP(base=base,
                          host=host,
                          password=password,
                          user=user,
                          **extras)
    elif scheme == 'http':
        from assemblyline.al.common.transport.http import TransportHTTP

        def get_extras(parsed_dict):
            valid_str_keys = ['pki']

            out = {}
            for k, v in parsed_dict.iteritems():
                if k in valid_str_keys:
                    out[k] = v[0]

            return out

        extras = get_extras(parse_qs(parsed.query))
        t = TransportHTTP(base=base,
                          host=host,
                          password=password,
                          user=user,
                          **extras)

    elif scheme == 'file':
        t = TransportLocal(base=base)
    else:
        raise FileStoreException("Unknown transport: %s" % scheme)

    return t
Exemplo n.º 13
0
        shard = arg

# Globals
alertq = queue.NamedQueue('m-alert', **persistent)  # df line queue
cache = {}
cache_lock = RLock()
chunk_size = 1000
completeq_name = 'm-complete-' + shard
date_fmt = '%Y-%m-%dT%H:%M:%SZ'
default_prefix = config.core.middleman.default_prefix
dup_prefix = 'w-' + shard + '-'
dupq = queue.MultiQueue(**persistent)  # df line queue
expire_after_seconds = config.core.middleman.expire_after
get_whitelist_verdict = forge.get_get_whitelist_verdict()
hostinfo = {
    'ip:': get_hostip(),
    'mac_address': get_mac_address(),
    'host': get_hostname(),
}
ingestq_name = 'm-ingest-' + shard
is_low_priority = forge.get_is_low_priority()
max_priority = config.submissions.max.priority
max_retries = 10
max_time = 2 * 24 * 60 * 60  # Wait 2 days for responses.
max_waiting = int(config.core.dispatcher.max.inflight) / (2 * shards)
min_priority = 1
priority_value = constants.PRIORITIES
retry_delay = 180
retryq = queue.NamedQueue('m-retry-' + shard, **persistent)  # df line queue
running = True
sampling = False
Exemplo n.º 14
0
hostname = 'unknownhost'
# noinspection PyBroadException
try:
    from assemblyline.common.net import get_hostname
    hostname = get_hostname()
except Exception:  # pylint:disable=W0702
    pass

ip = 'x.x.x.x'
# noinspection PyBroadException
try:
    from assemblyline.common.net import get_hostip
    ip = get_hostip()
except Exception:  # pylint:disable=W0702
    pass

AL_SYSLOG_FORMAT = f'{ip} AL %(levelname)8s %(process)5d %(name)20s | %(message)s'
AL_LOG_FORMAT = f'%(asctime)-16s %(levelname)8s {hostname} %(process)d %(name)30s | %(message)s'
AL_JSON_FORMAT = f'{{' \
    f'"@timestamp": "%(asctime)s", ' \
    f'"event": {{ "module": "assemblyline", "dataset": "%(name)s" }}, ' \
    f'"host": {{ "ip": "{ip}", "hostname": "{hostname}" }}, ' \
    f'"log": {{ "level": "%(levelname)s", "logger": "%(name)s" }}, ' \
    f'"process": {{ "pid": "%(process)d" }}, ' \
    f'"message": %(message)s}}'
Exemplo n.º 15
0
            self.stop()
        except RemoteShutdownInterrupt as ri:
            msg = 'Shutting down due to remote command: %s' % ri
            self.log.info(msg)
            self.stop()
        except Exception as ex:
            msg = 'Shutting down due to unhandled exception: %s' % get_stacktrace_info(ex)
            self.log.error(msg)
            self.stop()


class AgentClient(object):

    def __init__(self, async=False, sender=None):
        """ If sender is not specified the local MAC is used """
        self.sender = sender or net.get_mac_for_ip(net.get_hostip())
        self.async = async

    def _send_agent_rpc(self, mac, command, args=None):
        result = send_rpc_comms_queue(ControllerRequest(
            to=mac, mtype=command, body=args,
            sender=self.sender), async=self.async)

        if not self.async:
            if result:
                return result.body
            return 'timeout'
        else:
            return result

Exemplo n.º 16
0
commandq_name = 'a-command'
create_alert = forge.get_create_alert()
datastore = forge.get_datastore()
exit_msgs = ['server closed the connection unexpectedly']
interval = 3 * 60 * 60
logger = logging.getLogger('assemblyline.alerter')
max_consecutive_errors = 100
max_retries = 10
running = True

alertq = queue.NamedQueue(alertq_name, **persistent_settings)
commandq = queue.NamedQueue(commandq_name, **persistent_settings)

# Publish counters to the metrics sink.
counts = counter.AutoExportingCounters(name='alerter',
                                       host=net.get_hostip(),
                                       export_interval_secs=5,
                                       channel=forge.get_metrics_sink(),
                                       auto_log=True,
                                       auto_flush=True)
counts.start()


# noinspection PyUnusedLocal
def interrupt(unused1, unused2):  # pylint:disable=W0613
    global running  # pylint:disable=W0603
    logger.info("Caught signal. Coming down...")
    running = False


signal.signal(signal.SIGINT, interrupt)