Exemplo n.º 1
0
def get_resource_usage(raw_csv_usage: str, value_count: int,
                       interval_sec: int) -> List[ResourceUsage]:
    log.debug("raw: {}".format(raw_csv_usage))

    parsed = parse_usage_csv(raw_csv_usage)
    log.debug("parsed: {}".format(parsed))

    padded = pad_usage(parsed, value_count)
    log.debug("padded: {}".format(padded))

    TIME = 'Time'
    end_time = datetime.strptime(padded[TIME][-1], "%Y-%m-%d %H:%M:%S")
    end_time = pytz.utc.localize(end_time)
    end_time_epoch = datetime.timestamp(end_time)
    start_time_epoch = end_time_epoch - (value_count * interval_sec)

    usages = []
    for k, v in padded.items():
        if k == TIME:
            continue

        if is_kubernetes():
            w_id, resource_name = parse_kubernetes_csv_usage_heading(k)
        else:
            w_id, resource_name = parse_mesos_csv_usage_heading(k)

        values = [float('nan') if x == '' else float(x) for x in v]
        usage = ResourceUsage(w_id, resource_name, start_time_epoch,
                              interval_sec, values)
        usages.append(usage)

    return usages
Exemplo n.º 2
0
def get_current_workloads(docker_client):
    workloads = []
    for container in docker_client.containers.list():
        try:
            if is_kubernetes():
                workloads.append(get_workload_from_kubernetes(container.name))
            else:
                workloads.append(get_workload_from_disk(container.name))
        except:
            log.exception("Failed to read environment for container: '%s'",
                          container.name)

    return workloads
    def __snapshot_usage_raw(self):
        try:
            # Avoid making a metrics query on a potentially empty dataset which causes the query command to fail, which
            # causes noisy logs which look like failures.
            workload_manager = get_workload_manager()
            if workload_manager is None or len(
                    workload_manager.get_workloads()) == 0:
                log.info('No workloads so skipping pcp snapshot.')
                return

            instance_filter = "INVALID_INSTANCE_FILTER"
            if is_kubernetes():
                instance_filter = '.*titus-executor.*.service'
            else:
                instance_filter = '/containers.slice/[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}'

            # pmrep -a /var/log/pcp/pmlogger/$(hostname)/ -S -60m -t 1m -y s -o csv -i .*titus-executor.*.service  cgroup.cpuacct.usage cgroup.memory.usage
            snapshot_cmd_fmt = """ pmrep -a {0} \
                    -S -{1}s \
                    -T -0s \
                    -t {2}s \
                    -y s \
                    -o csv \
                    -i {3} \
                    cgroup.cpuacct.usage \
                    cgroup.memory.usage \
                    titus.network.in.bytes \
                    titus.network.out.bytes \
                    titus.disk.bytes_used """

            cmd_str = snapshot_cmd_fmt.format(get_pcp_archive_path(),
                                              self.__relative_start_sec,
                                              self.__interval_sec,
                                              instance_filter)

            log.info('Snapshoting usage from pcp: {}'.format(' '.join(
                cmd_str.split())))

            byte_array = subprocess.check_output(
                cmd_str, shell=True, timeout=self.__query_timeout_sec)
            raw_csv_snapshot = byte_array.decode('utf-8')
            usages = get_resource_usage(raw_csv_snapshot,
                                        self.__interval_count,
                                        self.__interval_sec)

            with self.__lock:
                self.__usages = usages
        except:
            log.exception("Failed to snapshot pcp data or compute usages")
Exemplo n.º 4
0
def get_current_workloads(docker_client):
    workloads = []
    for container in docker_client.containers.list():
        workload = None
        try:
            if is_kubernetes():
                workload = get_workload_from_kubernetes(container.name)
            else:
                workload = get_workload_from_disk(container.name)
        except Exception:
            log.error("Failed to read environment for container: '%s'",
                      container.name)

        if workload is not None:
            workloads.append(workload)

    return workloads
    def handle(self, event):
        if not self.__relevant(event):
            return

        workload = None
        container_name = get_container_name(event)

        if is_kubernetes():
            workload = get_workload_from_kubernetes(container_name)
        else:
            workload = get_workload_from_disk(container_name)

        if workload is None:
            raise Exception('failed to construct workload from event')

        self.handling_event(event,
                            "adding workload: '{}'".format(workload.get_id()))
        self.workload_manager.add_workload(workload)
        self.handled_event(event,
                           "added workload: '{}'".format(workload.get_id()))
Exemplo n.º 6
0
    event_manager.start_processing_events()


if __name__ != '__main__' and not is_testing():
    set_config_manager(ConfigManager(EnvPropertyProvider))
    log.info("Configuring logging...")
    gunicorn_logger = logging.getLogger('gunicorn.error')
    app.logger.handlers = gunicorn_logger.handlers
    app.logger.setLevel(gunicorn_logger.level)

    # Set the schedule library's logging level higher so it doesn't spam messages every time it schedules a task
    logging.getLogger('schedule').setLevel(logging.WARN)

    exit_handler = RealExitHandler()

    if is_kubernetes():
        log.info("Setting pod manager...")
        pod_manager = PodManager()
        pod_manager.start()
        set_pod_manager(pod_manager)

    log.info("Setting event log manager...")
    event_log_manager = LocalEventLogManager()
    set_event_log_manager(event_log_manager)

    log.info("Watching property changes for restart...")
    RestartPropertyWatcher(get_config_manager(), exit_handler,
                           RESTART_PROPERTIES)

    log.info("Modeling the CPU...")
    cpu = get_cpu_from_env()