Exemplo n.º 1
0
def run_throughput_inner(query_root, data_dir, generated_query_dir,
                         host, port, database, user, password,
                         stream, num_streams, queue, verbose):
    """

    :param query_root:
    :param data_dir: subdirectory with data to be loaded
    :param generated_query_dir: subdirectory with generated queries
    :param host: hostname where the Postgres database is running
    :param port: port number where the Postgres database is listening
    :param database: database name, where the benchmark will be run
    :param user: username of the Postgres user with full access to the benchmark DB
    :param password: password for the Postgres user
    :param stream: stream number
    :param num_streams: number of streams
    :param queue: process queue
    :param verbose: True if more verbose output is required
    :return: none, uses exit(1) to abort on errors
    """
    try:
        conn = pgdb.PGDB(host, port, database, user, password)
        result = r.Result("ThroughputQueryStream%s" % stream)
        if run_query_stream(conn, query_root, generated_query_dir, stream, num_streams, result, verbose):
            print("unable to finish query in stream #%s" % stream)
            exit(1)
        queue.put(result)
    except Exception as e:
        print("unable to connect to DB for query in stream #%s: %s" % (stream, e))
        exit(1)
Exemplo n.º 2
0
def load_tables(data_dir, host, port, db_name, user, password, tables, load_dir):
    """Loads data into tables. Expects that tables are already empty.

    Args:
        data_dir (str): Directory in which load data exists
        host (str): IP/hostname of the PG instance
        port (int): port for the PG instance
        db_name (str): name of the tpch database
        user (str): user for the PG instance
        password (str): password for the PG instance
        tables (str): list of tables
        load_dir (str): directory with data files to be loaded

    Return:
        0 if successful
        non zero otherwise
    """
    try:
        conn = pgdb.PGDB(host, port, db_name, user, password)
        try:
            for table in tables:
                filepath = os.path.join(data_dir, load_dir, table.lower() + ".tbl.csv")
                conn.copyFrom(filepath, separator="|", table=table)
            conn.commit()
        except Exception as e:
            print("unable to run load tables. %s" %e)
            return 1
        conn.close()
        return 0
    except Exception as e:
        print("unable to connect to the database. %s" % e)
        return 1
Exemplo n.º 3
0
def clean_database(query_root, host, port, db_name, user, password, tables):
    """Drops the tables if they exist

    Args:
        query_root (str): Directory in which generated queries directory exists
        host (str): IP/hostname of the PG instance
        port (int): port for the PG instance
        db_name (str): name of the tpch database
        user (str): user for the PG instance
        password (str): password for the PG instance
        tables (str): list of tables

    Return:
        0 if successful
        non zero otherwise
    """
    try:
        conn = pgdb.PGDB(host, port, db_name, user, password)
        try:
            for table in tables:
                conn.executeQuery("DROP TABLE IF EXISTS %s " % table)
        except Exception as e:
            print("unable to remove existing tables. %s" % e)
            return 1
        print("dropped existing tables")
        conn.commit()
        conn.close()
        return 0
    except Exception as e:
        print("unable to connect to the database. %s" % e)
        return 1
Exemplo n.º 4
0
def create_schema(query_root, host, port, db_name, user, password, prep_query_dir):
    """Creates the schema for the tests. Drops the tables if they exist

    Args:
        query_root (str): Directory in which generated queries directory exists
        host (str): IP/hostname of the PG instance
        port (int): port for the PG instance
        db_name (str): name of the tpch database
        user (str): user for the PG instance
        password (str): password for the PG instance
        prep_query_dir (str): directory with queries for schema creation

    Return:
        0 if successful
        non zero otherwise
    """
    try:
        conn = pgdb.PGDB(host, port, db_name, user, password)
        try:
            conn.executeQueryFromFile(os.path.join(query_root, prep_query_dir, "create_tbl.sql"))
        except Exception as e:
            print("unable to run create tables. %s" % e)
            return 1
        conn.commit()
        conn.close()
    except Exception as e:
        print("unable to connect to the database. %s" % e)
        return 1
Exemplo n.º 5
0
def index_tables(query_root, host, port, db_name, user, password, prep_query_dir):
    """Creates indexes and foreign keys for loaded tables.

    Args:
        query_root (str): Directory in which preparation queries directory exists
        host (str): IP/hostname of the PG instance
        port (int): port for the PG instance
        db_name (str): name of the tpch database
        user (str): user for the PG instance
        password (str): password for the PG instance
        prep_query_dir (str): directory with create index script

    Return:
        0 if successful
        non zero otherwise
    """
    try:
        conn = pgdb.PGDB(host, port, db_name, user, password)
        try:
            conn.executeQueryFromFile(os.path.join(query_root, prep_query_dir, "create_idx.sql"))
            conn.commit()
        except Exception as e:
            print("unable to run index tables. %s" % e)
            return 1
        conn.close()
        return 0
    except Exception as e:
        print("unable to connect to the database. %s" % e)
        return 1
Exemplo n.º 6
0
def run_power_test(query_root, data_dir, update_dir, delete_dir, generated_query_dir, results_dir,
                   host, port, database, user, password,
                   run_timestamp, num_streams, verbose, read_only):
    """

    :param query_root: directory where generated SQL statements are stored
    :param data_dir: subdirectory with data to be loaded
    :param update_dir: subdirectory with data to be updated
    :param delete_dir: subdirectory with data to be deleted
    :param generated_query_dir: subdirectory with generated queries
    :param results_dir: path to the results folder
    :param host: hostname where the Postgres database is running
    :param port: port number where the Postgres database is listening
    :param database: database name, where the benchmark will be run
    :param user: username of the Postgres user with full access to the benchmark DB
    :param password: password for the Postgres user
    :param run_timestamp: name of the run folder, format run_YYYYMMDD_HHMMSS
    :param num_streams: number of streams
    :param verbose: True if more verbose output is required
    :param read_only: True if no inserts/updates/deletes are to be run; can be used to run the same test multiple times
    without (re)loading the data, e.g. while developing
    :return: 0 if successful, 1 otherwise
    """
    try:
        print("Power tests started ...")
        conn = pgdb.PGDB(host, port, database, user, password)
        result = r.Result("Power")
        result.startTimer()
        stream = 0 # constant for power tests
        #
        if not read_only:
            if refresh_func1(conn, data_dir, update_dir, stream, num_streams, verbose):
                return 1
        result.setMetric(REFRESH_METRIC % (stream, 1), result.stopTimer())
        #
        if run_query_stream(conn, query_root, generated_query_dir, stream, num_streams, result, verbose):
            return 1
        #
        result.startTimer()
        if not read_only:
            if refresh_func2(conn, data_dir, delete_dir, stream, num_streams, verbose):
                return 1
        result.setMetric(REFRESH_METRIC % (stream, 2), result.stopTimer())
        #
        print("Power tests finished.")
        if verbose:
            result.printMetrics()
        result.saveMetrics(results_dir, run_timestamp, "power")
    except Exception as e:
        print("unable to run power tests. DB connection failed: %s" % e)
        return 1
    return 0
Exemplo n.º 7
0
def run_throughput_test(query_root, data_dir, update_dir, delete_dir, generated_query_dir, results_dir,
                        host, port, database, user, password,
                        run_timestamp, num_streams, verbose, read_only):
    """

    :param query_root:
    :param data_dir: subdirectory with data to be loaded
    :param update_dir: subdirectory with data to be updated
    :param delete_dir: subdirectory with data to be deleted
    :param generated_query_dir: subdirectory with generated queries
    :param results_dir: path to the results folder
    :param host: hostname where the Postgres database is running
    :param port: port number where the Postgres database is listening
    :param database: database name, where the benchmark will be run
    :param user: username of the Postgres user with full access to the benchmark DB
    :param password: password for the Postgres user
    :param run_timestamp: name of the run folder, format run_YYYYMMDD_HHMMSS
    :param num_streams: number of streams
    :param verbose: True if more verbose output is required
    :param read_only: True if no inserts/updates/deletes are to be run; can be used to run the same test multiple times
    without (re)loading the data, e.g. while developing
    :return: 0 if successful, 1 otherwise
    """
    try:
        print("Throughput tests started ...")
        conn = pgdb.PGDB(host, port, database, user, password)
        total = r.Result("ThroughputTotal")
        total.startTimer()
        processes = []
        queue = Queue()
        for i in range(num_streams):
            stream = i + 1
            # queries
            print("Throughput tests in stream #%s started ..." % stream)
            p = Process(target=run_throughput_inner,
                        args=(query_root, data_dir, generated_query_dir,
                              host, port, database, user, password,
                              stream, num_streams, queue, verbose))
            processes.append(p)
            p.start()
        result = r.Result("ThroughputRefreshStream")
        for i in range(num_streams):
            stream = i + 1
            # refresh functions
            result.startTimer()
            if not read_only:
                if refresh_func1(conn, data_dir, update_dir, stream, num_streams, verbose):
                    return 1
            result.setMetric(REFRESH_METRIC % (stream, 1), result.stopTimer())
            #
            result.startTimer()
            if not read_only:
                if refresh_func2(conn, data_dir, delete_dir, stream, num_streams, verbose):
                    return 1
            result.setMetric(REFRESH_METRIC % (stream, 2), result.stopTimer())
            #
        queue.put(result)
        for p in processes:
            p.join()
        print("Throughput tests finished.")
        for i in range(queue.qsize()):
            res = queue.get(False)
            if verbose:
                res.printMetrics()
            res.saveMetrics(results_dir, run_timestamp, THROUGHPUT)
        #
        total.setMetric(THROUGHPUT_TOTAL_METRIC, total.stopTimer())
        if verbose:
            total.printMetrics()
        total.saveMetrics(results_dir, run_timestamp, THROUGHPUT)
        #
    except Exception as e:
        print("unable to execute throughput tests: %s" % e)
        return 1
    return 0