Exemplo n.º 1
0
def venv_info(bin_dir):
    if bin_dir[-1] != "/":
        bin_dir += "/"

    output, error = get_output(bin_dir + "pip freeze --all")
    pkgs = parse_freeze(output)

    output, error = get_output(bin_dir + "python --version")
    pkgs["python"] = parse_python_version(output, error)
    return pkgs
Exemplo n.º 2
0
def pipenv_info(directory):
    directory = os.path.abspath(directory)
    # run pipenv using the serverscripts' interpreter
    pipenv = sys.executable + " -m pipenv"
    output, error = get_output(pipenv + " --where", cwd=directory)

    if output.strip() != directory:
        logger.error("No pipenv found in %s", directory)
        return

    output, error = get_output(pipenv + " run pip freeze --all", cwd=directory)
    pkgs = parse_freeze(output)

    output, error = get_output(pipenv + " run python --version", cwd=directory)
    pkgs["python"] = parse_python_version(output, error)
    return pkgs
Exemplo n.º 3
0
def _database_infos():
    """Return dict with info about the databases {database name: info}"""
    query = "select datname, pg_database_size(datname) from pg_database;"
    command = "sudo -u postgres psql -c '%s' --tuples-only" % query
    output, error = get_output(command)
    if error:
        logger.warning("Error output from psql command: %s", error)
    result = {}
    for line in output.split("\n"):
        if "|" not in line:
            continue
        parts = line.split("|")
        name = parts[0].strip()
        size = parts[1].strip()  # in MB
        if name.startswith("template") or name == "postgres":
            logger.debug("Omitting database %s", name)
            continue
        size = int(size)
        database_info = copy.deepcopy(DATABASE_TEMPLATE)
        database_info["name"] = name
        database_info["size"] = size
        result[name] = database_info
        logger.info(
            "Found database %s with size %s (%s MB)", name, size, size / 1024 / 1024
        )
    return result
Exemplo n.º 4
0
def python_details(container):
    """Run pip freeze in containers that are python-based

    A container is python based if it has "python" in its command.
    """
    # identify the python interpreter inside the docker
    split_command = container["command"].strip('"').split(" ")
    for python_exec in PYTHON_EXEC_OPTIONS:
        if python_exec in split_command:
            break
    else:
        # it is some other command (gunicorn; bin/gunicorn)
        dirname = os.path.dirname(split_command[0])
        if dirname == "":
            python_exec = "python3"  # global default
        else:
            python_exec = os.path.join(dirname, "python")
    python_in_docker = "docker exec " + container[
        "id"] + " " + python_exec + " "

    # identify the python version
    command = "--version"
    logger.debug("Running %s %s in container '%s'..", python_exec, command,
                 container["names"])
    output, _ = get_output(python_in_docker + command, fail_on_exit_code=False)
    if output.startswith(DOCKER_EXEC_ERROR) or output.startswith("Traceback"):
        logger.info("Did not find Python in docker %s", container["names"])
        return {}
    python_version = parse_python_version(output, "")
    logger.info(
        "Found Python %s ('%s') in container '%s'..",
        python_version,
        python_exec,
        container["names"],
    )

    # identify the python packages (eggs)
    command = "-m pip freeze --all"
    logger.debug("Running %s %s in container '%s'..", python_exec, command,
                 container["names"])
    output, _ = get_output(python_in_docker + command, fail_on_exit_code=False)
    if output.startswith(DOCKER_EXEC_ERROR):
        logger.warning("Error output from pip freeze in docker: %s", output)
    eggs = parse_freeze(output)
    eggs["python"] = python_version

    return {"eggs": eggs}
Exemplo n.º 5
0
def eggs_info(directory):
    files_of_interest = ["django", "test", "python"]
    possible_egg_dirs = set()
    python_version = None
    before = copy.copy(sys.path)
    bin_dir = os.path.join(directory, "bin")
    if not os.path.exists(bin_dir):
        return

    bin_dir_contents = os.listdir(bin_dir)
    for file_ in files_of_interest:
        if file_ not in bin_dir_contents:
            continue
        if possible_egg_dirs:
            logger.debug("Omitting bin/%s, we already have our info", file_)
            continue
        logger.debug("Looking in bin/%s for eggs+versions", file_)
        new_contents = []
        lines = open(os.path.join(directory, "bin", file_)).readlines()
        for line in lines:
            # Skipping imports that may be unavailable in the current path.
            if line.strip() != "import sys":
                # When we see these lines we have moved past the sys.path:
                if ("import " in line or "os.chdir" in line
                        or "__import__" in line
                        or "_interactive = True" in line):
                    break
            new_contents.append(line)
        # This is very evil, but cool! Because of the __name__ != main the
        # remainder of the script is not executed.
        exec("".join(new_contents))
        possible_egg_dirs.update(sys.path)
        # Detect python executable
        first_line = lines[0].strip()
        python_executable = first_line.lstrip("#!")
        output, error = get_output("%s --version" % python_executable,
                                   cwd=directory)
        try:
            total_output = output + error
            python_version = total_output.strip().split()[1]
            # ^^^ stdout (3) / stderr (2) outputs "Python 2.7.10"
        except IndexError:
            python_version = "UNKNOWN"
        logger.debug("Python version used: %s", python_version)

    # reset sys.path
    sys.path = before

    eggs = {}
    for dir_ in possible_egg_dirs:
        info = list(pkg_resources.find_distributions(dir_, only=True))
        if len(info) == 0:
            continue
        info = info[0]
        eggs[info.project_name] = info.version
    if "Python" in eggs:
        del eggs["Python"]  # This is the version we run with, it seems.
    eggs["python"] = python_version
    return eggs
Exemplo n.º 6
0
def _postgres_version():
    output, error = get_output("ps ax")
    lines = output.split("\n")
    for line in lines:
        if POSTGRES_VERSION.match(line):
            match = POSTGRES_VERSION.search(line)
            version = match.group("version")
            return version
    return ""
Exemplo n.º 7
0
def git_info(directory):
    """Return git information (like remote repo) for the directory"""
    logger.debug("Looking in %s...", directory)
    data = {}
    dir_contents = os.listdir(directory)
    if ".git" not in dir_contents:
        logger.warning("No .git directory found in %s", directory)
        return

    output, error = get_output("git remote -v", cwd=directory)
    for line in output.split("\n"):
        if not line:
            continue
        match = GIT_URL.search(line)
        if not match:
            logger.warning("Non-recognized 'git remote -v' line: %s", line)
            continue

        data["url"] = "https://github.com/{user}/{project}".format(
            user=match.group("user"), project=match.group("project"))
        logger.debug("Git repo found: %s", data["url"])
    output, error = get_output("git status", cwd=directory)
    output = output.lower()
    if "master" in output:
        data["release"] = "master"
        logger.debug("It is a master checkout")
    elif "main" in output:
        data["release"] = "main"
        logger.debug("It is a 'main' checkout")
    else:
        output, error = get_output("git describe", cwd=directory)
        first_line = output.split("\n")[0]
        data["release"] = first_line.strip()
        logger.debug("We're on a tag or branch: %s", data["release"])
    data["has_local_modifications"] = "changes not staged" in output
    data["has_untracked_files"] = "untracked" in output
    return data
Exemplo n.º 8
0
def django_info_buildout(bin_django):
    matplotlibenv = "MPLCONFIGDIR=/tmp"
    # ^^^ Corner case when something needs matplotlib in django's settings.
    target_user_id = os.stat(bin_django).st_uid
    command = "sudo -u \\#%s %s %s diffsettings" % (
        target_user_id,
        matplotlibenv,
        bin_django,
    )
    logger.debug("Running %s diffsettings...", bin_django)
    output, error = get_output(command)
    if error:
        logger.warning("Error output from diffsettings command: %s", error)
        if not output:
            return
    return parse_django_info(output)
Exemplo n.º 9
0
def container_details():
    """Return a list of details of running containers

    The fields are all fields that docker ps can return. See:
    See https://docs.docker.com/engine/reference/commandline/ps/.
    """
    command = "docker ps --no-trunc --format '{}'".format(DOCKER_PS_FORMAT)
    logger.debug("Running 'docker ps'...")
    output, error = get_output(command, fail_on_exit_code=False)
    if error:
        logger.warning("Error output from docker command: %s", error)
        return []
    keys = [x.lower() for x in DOCKER_PS_FIELDS]
    return [
        dict(zip(keys, line.split("\t"))) for line in output.split("\n")
        if line
    ]
Exemplo n.º 10
0
def all_info():
    """Return the info we want to extract from docker.

    The output looks like this::

      $ docker system df

      TYPE                TOTAL               ACTIVE              SIZE                RECLAIMABLE
      Images              50                  2                   16.66 GB            16.13 GB (96%)
      Containers          2                   2                   70 B                0 B (0%)
      Local Volumes       3                   3                   123 MB              0 B (0%)

    """
    result = DOCKER_TEMPLATE.copy()
    command = "docker system df"
    logger.debug("Running '%s'...", command)
    output, error = get_output(command, fail_on_exit_code=False)
    if error:
        logger.warning("Error output from docker command: %s", error)
    lines = [line.strip() for line in output.split("\n")]
    lines = [line.lower() for line in lines if line]
    if not lines or "active" not in lines[0]:
        return {}
    start_column = lines[0].find("active")
    for line in lines[1:]:
        count = line[start_column:start_column + 4].strip()
        try:
            count = int(count)
        except:
            count = "unknown"
            logger.exception("Couldn't parse int: %r", count)
            continue
        if "images" in line:
            result["active_images"] = count
        if "containers" in line:
            result["active_containers"] = count
        if "volumes" in line:
            result["active_volumes"] = count
    result["containers"] = container_details()
    for container in result["containers"]:
        container["python"] = python_details(container)
    logger.info("Found %d active docker containers",
                result["active_containers"])
    return result
Exemplo n.º 11
0
def supervisorctl_warnings(supervisorctl_command):
    """Return number of not-running processes inside supervisorctl"""
    command = "%s status" % supervisorctl_command
    logger.debug("Running '%s'...", command)
    output, error = get_output(command)
    if error:
        logger.warning("Error output from supervisorctl command: %s", error)

    lines = [line.strip() for line in output.split("\n")]
    lines = [line for line in lines if line]
    for exception in SUPERVISOR_CRONJOB_EXCEPTIONS:
        lines = [line for line in lines if exception not in line]
    not_running = [line for line in lines if "running" not in line.lower()]
    num_not_running = len(not_running)
    if num_not_running:
        logger.warning("Some processes in %s aren't running:",
                       supervisorctl_command)
        for line in not_running:
            logger.warning("    %s", line)
    return num_not_running
Exemplo n.º 12
0
def django_info_pipenv(directory):
    original_dir = os.getcwd()
    os.chdir(directory)
    matplotlibenv = "MPLCONFIGDIR=/tmp"
    # ^^^ Corner case when something needs matplotlib in django's settings.
    target_user_id = os.stat("manage.py").st_uid
    django_script = "pipenv run python manage.py"

    command = "sudo -u \\#%s %s %s diffsettings" % (
        target_user_id,
        matplotlibenv,
        django_script,
    )
    output, error = get_output(command, cwd=directory, fail_on_exit_code=False)
    os.chdir(original_dir)
    if error:
        logger.warning("Error output from diffsettings command: %s", error)
        if not output:
            return
    return parse_django_info(output)
Exemplo n.º 13
0
def _table_bloat(database_names):
    query = """
-- btree index stats query
-- estimates bloat for btree indexes
WITH btree_index_atts AS (
    SELECT nspname,
        indexclass.relname as index_name,
        indexclass.reltuples,
        indexclass.relpages,
        indrelid, indexrelid,
        indexclass.relam,
        tableclass.relname as tablename,
        regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,
        indexrelid as index_oid
    FROM pg_index
    JOIN pg_class AS indexclass ON pg_index.indexrelid = indexclass.oid
    JOIN pg_class AS tableclass ON pg_index.indrelid = tableclass.oid
    JOIN pg_namespace ON pg_namespace.oid = indexclass.relnamespace
    JOIN pg_am ON indexclass.relam = pg_am.oid
    WHERE pg_am.amname = 'btree' and indexclass.relpages > 0
         AND nspname NOT IN ('pg_catalog','information_schema')
    ),
index_item_sizes AS (
    SELECT
    ind_atts.nspname, ind_atts.index_name,
    ind_atts.reltuples, ind_atts.relpages, ind_atts.relam,
    indrelid AS table_oid, index_oid,
    current_setting('block_size')::numeric AS bs,
    8 AS maxalign,
    24 AS pagehdr,
    CASE WHEN max(coalesce(pg_stats.null_frac,0)) = 0
        THEN 2
        ELSE 6
    END AS index_tuple_hdr,
    sum( (1-coalesce(pg_stats.null_frac, 0)) * coalesce(pg_stats.avg_width, 1024) ) AS nulldatawidth
    FROM pg_attribute
    JOIN btree_index_atts AS ind_atts ON pg_attribute.attrelid = ind_atts.indexrelid AND pg_attribute.attnum = ind_atts.attnum
    JOIN pg_stats ON pg_stats.schemaname = ind_atts.nspname
          -- stats for regular index columns
          AND ( (pg_stats.tablename = ind_atts.tablename AND pg_stats.attname = pg_catalog.pg_get_indexdef(pg_attribute.attrelid, pg_attribute.attnum, TRUE))
          -- stats for functional indexes
          OR   (pg_stats.tablename = ind_atts.index_name AND pg_stats.attname = pg_attribute.attname))
    WHERE pg_attribute.attnum > 0
    GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9
),
index_aligned_est AS (
    SELECT maxalign, bs, nspname, index_name, reltuples,
        relpages, relam, table_oid, index_oid,
        coalesce (
            ceil (
                reltuples * ( 6
                    + maxalign
                    - CASE
                        WHEN index_tuple_hdr%maxalign = 0 THEN maxalign
                        ELSE index_tuple_hdr%maxalign
                      END
                    + nulldatawidth
                    + maxalign
                    - CASE /* Add padding to the data to align on MAXALIGN */
                        WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign
                        ELSE nulldatawidth::integer%maxalign
                      END
                )::numeric
              / ( bs - pagehdr::NUMERIC )
              +1 )
         , 0 )
      as expected
    FROM index_item_sizes
),
raw_bloat AS (
    SELECT current_database() as dbname, nspname, pg_class.relname AS table_name, index_name,
        bs*(index_aligned_est.relpages)::bigint AS totalbytes, expected,
        CASE
            WHEN index_aligned_est.relpages <= expected
                THEN 0
                ELSE bs*(index_aligned_est.relpages-expected)::bigint
            END AS wastedbytes,
        CASE
            WHEN index_aligned_est.relpages <= expected
                THEN 0
                ELSE bs*(index_aligned_est.relpages-expected)::bigint * 100 / (bs*(index_aligned_est.relpages)::bigint)
            END AS realbloat,
        pg_relation_size(index_aligned_est.table_oid) as table_bytes,
        stat.idx_scan as index_scans
    FROM index_aligned_est
    JOIN pg_class ON pg_class.oid=index_aligned_est.table_oid
    JOIN pg_stat_user_indexes AS stat ON index_aligned_est.index_oid = stat.indexrelid
),
format_bloat AS (
SELECT dbname as database_name, nspname as schema_name, table_name,
        round(realbloat) as bloat_pct, round(wastedbytes/(1024^2)::NUMERIC) as bloat_mb
FROM raw_bloat
)
-- final query outputting the bloated indexes
-- change the where and order by to change
-- what shows up as bloated
SELECT *
FROM format_bloat
WHERE ( bloat_pct > 20 and bloat_mb > 10 )
ORDER BY bloat_mb DESC;
    """
    result = []
    for database_name in database_names:
        command = 'sudo -u postgres psql -c "%s" --tuples-only %s' % (
            query,
            database_name,
        )

        output, error = get_output(command)
        if error:
            logger.warning("Error output from psql command: %s", error)
        for line in output.split("\n"):
            #  database_name | schema_name | table_name | bloat_pct | bloat_mb
            if "|" not in line:
                continue
            parts = [part.strip() for part in line.split("|")]
            name = ":".join([parts[0].strip(), parts[1], parts[2]])
            percentage = parts[3]
            mb = parts[4]
            result.append({"name": name, "percentage": percentage, "mb": mb})
            logger.info("Table %s has %s%% bloat (%sMB)", name, percentage, mb)

    return result