Пример #1
0
    def prep_e_tables(self):
        (ok, result) = psql.runfile(ifile = self.scripts_dir + os.sep + 'prep_database.sql', dbname = self.database_name)
        self.output('------ create schema ------\n' + '\n'.join(result))
        if not ok or str(result).find('ERROR') != -1:
            return False
        
        self.port = self.getOpenPort()
        hostname = socket.gethostname()
        
        with open(self.scripts_dir + os.sep + 'prep_external_tables.sql', 'r') as f:
            sql = f.read()
            sql = sql.replace('//HOST:PORT','//%s:%s' % (hostname, self.port) )
        with open(self.tmp_folder + os.sep + 'prep_external_tables.sql', 'w') as f:
            f.write(sql)

        self.output('------ prep_external_tables ------')
        (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'prep_external_tables.sql', dbname = self.database_name, flag = '-e')
        self.output('/n'.join(result))

        if ok and str(result).find('ERROR') == -1:
            cmd = 'gpfdist -d %s -p %s -l %s/fdist.%s.log &' % (self.tmp_folder, self.port, self.tmp_folder, self.port)
            self.output(cmd)
            result = os.system(cmd)
            self.output(str(result))
            return True    
        else:
            self.output('prep_external_tables error. ')
            return False
Пример #2
0
    def grand_revoke_privileges(self, filename = ''):
        self.output('-- Start exec %s for database %s' % (filename, self.database_name))
        if self.run_workload_flag and self.user != 'gpadmin':
            with open(self.workload_directory + '/data/' + filename , 'r') as f:
                query = f.read()
            if gl.suffix:
                query = query.replace('TABLESUFFIX', self.tbl_suffix)
            else:
                query = query.replace('_TABLESUFFIX', '')
            query = query.replace('ROLENAME', self.user)

            file_path = self.tmp_folder + os.sep + '%s_%s_' % (self.database_name, self.user) + filename
            with open(file_path, 'w') as f:
                f.write(query)
            for retry in range(0, 20):
                time.sleep(random.randint(1, 10))
                (ok, output) = psql.runfile(ifile = file_path, dbname = self.database_name, username = '******', flag = '-t -A')
                if not ok or str(output).find('ERROR:') != -1 or str(output).find('FATAL:') != -1 or str(output).find('psql:') != -1:
                    self.output("Retry %d times:" %retry)
                    self.output(query)
                    self.output('\n'.join(output))
                else:
                    break
            self.output(query)
            self.output('\n'.join(output))
        self.output('-- Complete exec %s for database %s' % (filename, self.database_name))
Пример #3
0
    def grand_revoke_privileges(self, filename = ''):
        self.output('-- Start exec %s for database %s' % (filename, self.database_name))
        if self.run_workload_flag and self.user != 'gpadmin':
            with open(self.workload_directory + '/data-hawq/' + filename , 'r') as f:
                query = f.read()
            if gl.suffix:
                query = query.replace('TABLESUFFIX', self.tbl_suffix)
            else:
                query = query.replace('_TABLESUFFIX', '')
            query = query.replace('ROLENAME', self.user)

            file_path = self.tmp_folder + os.sep + '%s_%s_' % (self.database_name, self.user) + filename
            with open(file_path, 'w') as f:
                f.write(query)
            for retry in range(0, 20):
                time.sleep(random.randint(1, 10))
                (ok, output) = psql.runfile(ifile = file_path, dbname = self.database_name, flag = '-t -A')
                if not ok or str(output).find('ERROR:') != -1 or str(output).find('FATAL:') != -1 or str(output).find('psql:') != -1:
                    self.output("Retry %d times:" %retry)
                    self.output(query)
                    self.output('\n'.join(output))
                else:
                    break
            self.output(query)
            self.output('\n'.join(output))
        self.output('-- Complete exec %s for database %s' % (filename, self.database_name))
Пример #4
0
    def load_data(self):
        self.output('-- generate data file: %s' % (self.fname))
        cmd = "dbgen -b %s -s 1 -T L > %s " % (self.dss, self.fname)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        if status != 0:
            print("generate data file %s error. " % (self.fname))
            sys.exit(2)
        self.output('generate data file successed. ')
        
        if self.load_data_flag == False:
            return
        self.output('-- Start loading data')
        super(Copy,self).load_data()

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output('ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory))
            return
        
        table_name = 'lineitem_copy'
        with open(data_directory + os.sep + table_name + '.sql', 'r') as f:
               cmd = f.read()
               cmd = self.replace_sql(sql = cmd, table_name = table_name)

        with open(self.tmp_folder + os.sep + 'copy_create.sql', 'w') as f:
            f.write(cmd)
            if self.user != 'gpadmin':
                f.write('GRANT ALL ON TABLE %s TO %s;' % (self.table_name, self.user))
        self.output(cmd)
        (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'copy_create.sql', dbname = self.database_name, flag = '-t -A')
        self.output('\n'.join(result))
        self.output('-- Complete loading data')      
Пример #5
0
    def run_queries_dir(self, tables, data_directory_suffix,
                        success_criteria_str, test_type):
        hawq_data_directory = self.workload_directory + os.sep + data_directory_suffix
        # run all sql in each loading data file
        for table_name in tables:
            con_id = -1
            if self.continue_flag:
                with open(
                        os.path.join(hawq_data_directory, table_name + '.sql'),
                        'r') as f:
                    cmd = f.read()
                cmd = self.replace_sql(sql=cmd,
                                       table_name=table_name,
                                       query_type=test_type)

                with open(
                        self.tmp_folder + os.sep + data_directory_suffix +
                        '-' + table_name + '.sql', 'w') as f:
                    f.write(cmd)

                self.output(cmd)
                beg_time = datetime.now()
                (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep +
                                            data_directory_suffix + '-' +
                                            table_name + '.sql',
                                            dbname=self.database_name,
                                            username='******',
                                            flag='-t -A')
                end_time = datetime.now()
                self.output(result[0].split('***')[0])

                if ok and str(result).find('ERROR') == -1 and str(result).find(
                        'FATAL'
                ) == -1 and str(result).find(success_criteria_str) != -1:
                    status = 'SUCCESS'
                    con_id = -1
                else:
                    status = 'ERROR'
                    beg_time = datetime.now()
                    end_time = beg_time
                    self.continue_flag = False
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
            beg_time = str(beg_time).split('.')[0]
            end_time = str(end_time).split('.')[0]

            self.output(
                '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                % (table_name, 1, 1, status, duration))
            self.report_sql(
                "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);"
                % (self.tr_id, self.s_id, con_id, table_name, status, beg_time,
                   end_time, duration, self.adj_s_id))
Пример #6
0
    def load_data(self):
        if self.load_data_flag == False:
            return
        self.output('-- Start loading data')
        super(Sri, self).load_data()

        # create table and sequence
        if self.distributed_randomly:
            cmd = 'drop sequence sri_seg;create sequence sri_seq;drop table if exists %s;\n' % (
                self.table_name
            ) + 'create table %s (tid bigint, bdate date, aid int, delta int, mtime timestamp) with (%s) ' % (
                self.table_name, self.sql_suffix)
        else:
            cmd = 'drop sequence sri_seg;create sequence sri_seq;drop table if exists %s;\n' % (
                self.table_name
            ) + 'create table %s (tid bigint, bdate date, aid int, delta int, mtime timestamp) with (%s)' % (
                self.table_name, self.sql_suffix)
        if self.distributed_randomly:
            cmd = cmd + 'distributed randomly '
        else:
            cmd = cmd + 'distributed by (tid) '
        if self.partitions == 0 or self.partitions is None:
            partition_query = ''
        else:
            with open(self.workload_directory + os.sep + 'partition.tpl',
                      'r') as p:
                partition_query = p.read()
            partition_query = partition_query.replace('table_name',
                                                      self.table_name)
            partition_query = partition_query.replace('table_orientation',
                                                      self.orientation)
            if self.compression_type is None:
                partition_query = partition_query.replace(
                    ', compresstype=table_compresstype', '')
            else:
                partition_query = partition_query.replace(
                    'table_compresstype', str(self.compression_type))
            if self.compression_level is None or self.compression_level < 0:
                partition_query = partition_query.replace(
                    ', compresslevel=table_compresslevel', '')
            else:
                partition_query = partition_query.replace(
                    'table_compresslevel', str(self.compression_level))

        cmd = cmd + partition_query + ';'

        with open(self.tmp_folder + os.sep + 'sri_create.sql', 'w') as f:
            f.write(cmd)

        self.output(cmd)
        (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep +
                                    'sri_create.sql',
                                    dbname=self.database_name,
                                    flag='-t -A')
        self.output('\n'.join(result))

        self.output('-- Complete loading data')
Пример #7
0
    def load_data(self):

        if self.load_data_flag:
            beg_time = datetime.now()
            end_time = beg_time
            status = 'ERROE'
            if self.check_seeds() and self.prep_e_tables() and self.prep_udfs():
                status = 'SUCCESS'
                os.system(self.scripts_dir + os.sep + 'prep_GUCs.sh')

                # dca_demo_conf set the data scale 
                with open(self.scripts_dir + os.sep + 'dca_demo_conf.sql', 'r') as f:
                    sql = f.read()
                    scale = int (8500000000 / 10.4 / 1024 * self.scale_factor)
                    sql = sql.replace('8500000000', str(scale))
                with open(self.tmp_folder + os.sep + 'dca_demo_conf.sql', 'w') as f:
                    f.write(sql)
                
                scripts = ['prep_dimensions.sql', 'prep_facts.sql', 'gen_order_base.sql', 'gen_facts.sql']
                for script in scripts:
                    self.output('------ start %s ------' % (script))

                    with open(self.scripts_dir + os.sep + script, 'r') as f:
                        sql = f.read()
                    sql = sql.replace('PATH_OF_DCA_DEMO_CONF_SQL', '\i %s/dca_demo_conf.sql' % (self.tmp_folder))
                    sql = sql.replace('SQLSUFFIX', self.sql_suffix)
                    with open(self.tmp_folder + os.sep + script, 'w') as f:
                        f.write(sql)   
                    
                    (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + script, dbname = self.database_name, flag = '-e')
                    self.output('\n'.join(result))

                    if ok and str(result).find('ERROR') == -1: 
                        end_time = datetime.now()    
                    else:
                        status = 'ERROR'
                        self.output('%s is error!' % (script))
                        end_time = datetime.now()
                        break  
        
        else:
            status = 'SKIP'
            beg_time = datetime.now()
            end_time = beg_time
                
        duration = end_time - beg_time
        duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000
        beg_time = str(beg_time).split('.')[0]
        end_time = str(end_time).split('.')[0]
        
        self.output('   Loading   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (1, 1, status, duration))
        self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);" 
            % (self.tr_id, self.s_id, 'retail_dw_' + self.tbl_suffix, 1, status, beg_time, end_time, duration))
Пример #8
0
    def prep_e_tables(self):
        (ok, result) = psql.runfile(ifile=self.scripts_dir + os.sep +
                                    'prep_database.sql',
                                    dbname=self.database_name)
        self.output('------ create schema ------\n' + '\n'.join(result))
        if not ok or str(result).find('ERROR') != -1:
            return False

        self.port = self.getOpenPort()
        hostname = socket.gethostname()

        with open(self.scripts_dir + os.sep + 'prep_external_tables.sql',
                  'r') as f:
            sql = f.read()
            sql = sql.replace('//HOST:PORT', '//%s:%s' % (hostname, self.port))
        with open(self.tmp_folder + os.sep + 'prep_external_tables.sql',
                  'w') as f:
            f.write(sql)

        self.output('------ prep_external_tables ------')
        (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep +
                                    'prep_external_tables.sql',
                                    dbname=self.database_name,
                                    flag='-e')
        self.output('/n'.join(result))

        if ok and str(result).find('ERROR') == -1:
            cmd = 'gpfdist -d %s -p %s -l %s/fdist.%s.log &' % (
                self.tmp_folder, self.port, self.tmp_folder, self.port)
            self.output(cmd)
            result = os.system(cmd)
            self.output(str(result))
            return True
        else:
            self.output('prep_external_tables error. ')
            return False
Пример #9
0
    def vacuum_analyze(self):
        self.output('-- Start vacuum analyze')     
        con_id = -1
        if self.continue_flag:
            if self.load_data_flag:
                #sql = 'VACUUM ANALYZE;'
                sql = 'ANALYZE;'
                self.output(sql)
                sql_filename = 'vacuum.sql'
                # get con_id
                sql_file = '%' + sql_filename + '%'
                get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (sql_filename , sql_file)
                
                with open(self.tmp_folder + os.sep + sql_filename, 'w') as f:
                    f.write(sql)
                    f.write(get_con_id_sql)

                beg_time = datetime.now()
                (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + sql_filename, dbname = self.database_name, flag = '-t -A') # , username = self.user,)
                end_time = datetime.now()
                self.output(result[0].split('***')[0])

                if ok and str(result).find('ERROR:') == -1 and str(result).find('FATAL:') == -1 and str(result).find('psql:'):
                    status = 'SUCCESS'
                    con_id = int(result[0].split('***')[1].split('|')[2].strip())
                else:
                    status = 'ERROR'
                    self.continue_flag = False
            else:
                status = 'SKIP'
                beg_time = datetime.now()
                end_time = beg_time
        else:
            status = 'ERROR'
            beg_time = datetime.now()
            end_time = beg_time

        duration = end_time - beg_time
        duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds/1000
        beg_time = str(beg_time)
        end_time = str(end_time)
 
        self.output('   VACUUM ANALYZE   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (1, 1, status, duration))
        self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Vacuum_analyze', 'Vacuum_analyze', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
            % (self.tr_id, self.s_id, con_id, status, beg_time, end_time, duration, self.adj_s_id))
        
        self.output('-- Complete vacuum analyze')
Пример #10
0
    def vacuum_analyze(self):
        self.output('-- Start vacuum analyze')     
        con_id = -1
        if self.continue_flag:
            if self.load_data_flag:
                #sql = 'VACUUM ANALYZE;'
                sql = 'ANALYZE;'
                self.output(sql)
                sql_filename = 'vacuum.sql'
                # get con_id
                sql_file = '%' + sql_filename + '%'
                get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (sql_filename , sql_file)
                
                with open(self.tmp_folder + os.sep + sql_filename, 'w') as f:
                    f.write(sql)
                    f.write(get_con_id_sql)

                beg_time = datetime.now()
                (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + sql_filename, dbname = self.database_name, flag = '-t -A') # , username = self.user,)
                end_time = datetime.now()
                self.output(result[0].split('***')[0])

                if ok and str(result).find('ERROR:') == -1 and str(result).find('FATAL:') == -1 and str(result).find('psql:'):
                    status = 'SUCCESS'
                    con_id = int(result[0].split('***')[1].split('|')[2].strip())
                else:
                    status = 'ERROR'
                    self.continue_flag = False
            else:
                status = 'SKIP'
                beg_time = datetime.now()
                end_time = beg_time
        else:
            status = 'ERROR'
            beg_time = datetime.now()
            end_time = beg_time

        duration = end_time - beg_time
        duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds/1000
        beg_time = str(beg_time)
        end_time = str(end_time)
 
        self.output('   VACUUM ANALYZE   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (1, 1, status, duration))
        self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Vacuum_analyze', 'Vacuum_analyze', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
            % (self.tr_id, self.s_id, con_id, status, beg_time, end_time, duration, self.adj_s_id))
        
        self.output('-- Complete vacuum analyze')
Пример #11
0
    def prep_udfs(self):
        # make
        box_muller = self.workload_directory + os.sep + 'box_muller'
        gphome = os.environ['GPHOME']

        cmd = 'cd %s;make clean;make install' % (box_muller)
        self.output(cmd)
        (status, output) = commands.getstatusoutput(cmd)
        if status != 0:
            self.output('error: ' + output)
            return False
        else:
            self.output('make success. ')

        host_list = os.popen(
            'psql -d %s -c \"SELECT hostname FROM pg_catalog.gp_segment_configuration GROUP BY hostname ORDER by hostname;\"'
            % (self.database_name)).readlines()
        lists = [
            i.strip() for i in host_list if 'hostname' not in i
            and '---' not in i and 'row' not in i and i != '\n'
        ]
        hostfile = ''
        for host in lists:
            hostfile = hostfile + '-h %s ' % (host)

        cmd = 'gpscp %s %s/bm.so =:%s/lib/postgresql/' % (hostfile, box_muller,
                                                          gphome)
        self.output(cmd)
        (status, output) = commands.getstatusoutput(cmd)
        if status != 0:
            self.output('error: ' + output)
            return False
        else:
            self.output('gpscp success. ')

        (ok, result) = psql.runfile(ifile=self.scripts_dir + os.sep +
                                    'prep_UDFs.sql',
                                    dbname=self.database_name)
        self.output('------ perp udfs ------\n' + '\n'.join(result))
        if not ok or str(result).find('ERROR') != -1:
            self.output('prep_UDFs error. ')
            return False
        else:
            self.output('prep_UDFs success. ')

        return True
Пример #12
0
    def load_data(self):
        self.output('-- generate data file: %s' % (self.fname))
        cmd = "dbgen -b %s -s 1 -T L > %s " % (self.dss, self.fname)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        if status != 0:
            print("generate data file %s error. " % (self.fname))
            sys.exit(2)
        self.output('generate data file successed. ')

        if self.load_data_flag == False:
            return
        self.output('-- Start loading data')
        super(Copy, self).load_data()

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output(
                'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists'
                % (data_directory))
            return

        table_name = 'lineitem_copy'
        with open(data_directory + os.sep + table_name + '.sql', 'r') as f:
            cmd = f.read()
            cmd = self.replace_sql(sql=cmd, table_name=table_name)

        with open(self.tmp_folder + os.sep + 'copy_create.sql', 'w') as f:
            f.write(cmd)
            if self.user != 'gpadmin':
                f.write('GRANT ALL ON TABLE %s TO %s;' %
                        (table_name, self.user))
        self.output(cmd)
        (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep +
                                    'copy_create.sql',
                                    dbname=self.database_name,
                                    flag='-t -A')
        self.output('\n'.join(result))
        self.output('-- Complete loading data')
Пример #13
0
    def load_data(self):
        if self.load_data_flag == False:
            return
        self.output('-- Start loading data')
        super(Sri,self).load_data()

        # create table and sequence
        if self.distributed_randomly:
            cmd = 'drop sequence sri_seg;create sequence sri_seq;drop table if exists %s;\n' % (self.table_name) + 'create table %s (tid bigint, bdate date, aid int, delta int, mtime timestamp) with (%s) distributed randomly' % (self.table_name, self.sql_suffix)
        else:
            cmd = 'drop sequence sri_seg;create sequence sri_seq;drop table if exists %s;\n' % (self.table_name) + 'create table %s (tid bigint, bdate date, aid int, delta int, mtime timestamp) with (%s) distributed by (tid)' % (self.table_name, self.sql_suffix)
        
        if self.partitions == 0 or self.partitions is None:
            partition_query = ''
        else:
            with open(self.workload_directory + os.sep + 'partition.tpl', 'r') as p:
                partition_query = p.read()
            partition_query = partition_query.replace('table_name', self.table_name)
            partition_query = partition_query.replace('table_orientation', self.orientation)
            if self.compression_type is None:
                partition_query = partition_query.replace(', compresstype=table_compresstype', '')
            else:
                partition_query = partition_query.replace('table_compresstype', str(self.compression_type))
            if self.compression_level is None or self.compression_level < 0:
                partition_query = partition_query.replace(', compresslevel=table_compresslevel', '')
            else:
                partition_query = partition_query.replace('table_compresslevel', str(self.compression_level))

        cmd = cmd + partition_query + ';'

        with open(self.tmp_folder + os.sep + 'sri_create.sql', 'w') as f:
            f.write(cmd)
        
        self.output(cmd)    
        (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'sri_create.sql', dbname = self.database_name, flag = '-t -A')
        self.output('\n'.join(result))
        
        self.output('-- Complete loading data')
Пример #14
0
    def prep_udfs(self):
        # make
        box_muller = self.workload_directory + os.sep + 'box_muller'
        gphome=os.environ['GPHOME']

        cmd = 'cd %s;make clean;make install' % (box_muller)
        self.output(cmd)
        (status, output) = commands.getstatusoutput(cmd)
        if status != 0:
            self.output('error: ' + output)
            return False
        else:
            self.output('make success. ')

        host_list = os.popen('psql -d %s -c \"SELECT hostname FROM pg_catalog.gp_segment_configuration GROUP BY hostname ORDER by hostname;\"'% (self.database_name)).readlines()
        lists = [i.strip() for i in host_list if 'hostname' not in i and '---' not in i and 'row' not in i and i !='\n']
        hostfile = ''
        for host in lists:
            hostfile = hostfile + '-h %s ' % (host)

        cmd = 'gpscp %s %s/bm.so =:%s/lib/postgresql/' % (hostfile, box_muller, gphome)
        self.output(cmd)
        (status, output) = commands.getstatusoutput(cmd)
        if status != 0:
            self.output('error: ' + output)
            return False
        else:
            self.output('gpscp success. ')

        (ok, result) = psql.runfile(ifile = self.scripts_dir + os.sep + 'prep_UDFs.sql', dbname = self.database_name)
        self.output('------ perp udfs ------\n' + '\n'.join(result))
        if not ok or str(result).find('ERROR') != -1:
            self.output('prep_UDFs error. ')
            return False
        else:
            self.output('prep_UDFs success. ')

        return True
Пример #15
0
    def load_data(self):
        self.output("-- Start loading data")

        # get the data dir
        data_directory = self.workload_directory + os.sep + "data"
        if not os.path.exists(data_directory):
            self.output("ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists" % (data_directory))
            sys.exit(2)

        if self.load_data_flag:
            cmd = "drop database if exists %s;" % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print "\n".join(output)
                sys.exit(2)
            self.output(cmd)
            self.output("\n".join(output))

            cmd = "create database %s;" % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd, username=self.user)
            if not ok:
                print cmd
                print "\n".join(output)
                sys.exit(2)
            self.output(cmd)
            self.output("\n".join(output))

        tables = ["nation", "region", "part", "supplier", "partsupp", "customer", "orders", "lineitem", "revenue"]
        for table_name in tables:
            if self.continue_flag:
                if self.load_data_flag:
                    with open(data_directory + os.sep + table_name + ".sql", "r") as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql=cmd, table_name=table_name)
                    with open(self.tmp_folder + os.sep + table_name + ".sql", "w") as f:
                        f.write(cmd)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(
                        ifile=self.tmp_folder + os.sep + table_name + ".sql",
                        dbname=self.database_name,
                        username=self.user,
                    )
                    end_time = datetime.now()
                    self.output("\n".join(result))

                    if ok and str(result).find("ERROR") == -1 and str(result).find("FATAL") == -1:
                        status = "SUCCESS"
                    else:
                        status = "ERROR"
                        self.continue_flag = False
                else:
                    status = "SKIP"
                    beg_time = datetime.now()
                    end_time = beg_time
            else:
                status = "ERROR"
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
            beg_time = str(beg_time).split(".")[0]
            end_time = str(end_time).split(".")[0]
            self.output(
                "   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d" % (table_name, 1, 1, status, duration)
            )
            self.report_sql(
                "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);"
                % (self.tr_id, self.s_id, table_name, status, beg_time, end_time, duration)
            )

        self.output("-- Complete loading data")
Пример #16
0
    def load_data(self):
        self.output('-- Start loading data')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data' + '-' + self.test_type
        if not os.path.exists(data_directory):
            self.output(
                'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists'
                % (data_directory))
            sys.exit(2)

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            cmd = 'create database %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

        tables = [
            'nation', 'region', 'part', 'supplier', 'partsupp', 'customer',
            'orders', 'lineitem', 'revenue'
        ]

        if self.test_type == 'pxf':
            self.output(
                'Deleting writable external table director if it already exists'
            )
            os.system('sudo -u pxf hdfs dfs -rm -r %s' %
                      (self.pxf_writable_path))

        for table_name in tables:
            con_id = -1
            if self.continue_flag:
                if self.load_data_flag:
                    with open(data_directory + os.sep + table_name + '.sql',
                              'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql=cmd, table_name=table_name)

                    # get con_id use this query
                    unique_string1 = '%s_%s_' % (self.workload_name,
                                                 self.user) + table_name
                    unique_string2 = '%' + unique_string1 + '%'
                    get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (
                        unique_string1, unique_string2)

                    with open(self.tmp_folder + os.sep + table_name + '.sql',
                              'w') as f:
                        f.write(cmd)
                        f.write(get_con_id_sql)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(
                        ifile=self.tmp_folder + os.sep + table_name + '.sql',
                        dbname=self.database_name,
                        flag='-t -A')  #, username = self.user)
                    #(ok,result) = commands.getstatusoutput('psql -d %s -f %s -t -A' % (self.database_name, self.tmp_folder + os.sep + table_name + '.sql'))
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])
                    #self.output('\n'.join(result))

                    if ok and str(result).find('ERROR:') == -1 and str(
                            result).find('FATAL:') == -1 and (
                                str(result).find('INSERT 0') != -1
                                or str(result).find('CREATE VIEW') != -1):
                        status = 'SUCCESS'
                        con_id = int(
                            result[0].split('***')[1].split('|')[2].strip())
                    else:
                        status = 'ERROR'
                        self.continue_flag = False
                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
            beg_time = str(beg_time)
            end_time = str(end_time)
            self.output(
                '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                % (table_name, 1, 1, status, duration))
            self.report_sql(
                "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);"
                % (self.tr_id, self.s_id, con_id, table_name, status, beg_time,
                   end_time, duration, self.adj_s_id))

        self.output('-- Complete loading data')
Пример #17
0
    def load_data(self):
        self.output('-- Start loading data ')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output('ERROR: Cannot find DDL to create tables for Copy: %s does not exists' % (data_directory))
            return
        
        self.output('-- start gpfdist service')
        self.gpfdist_port = self._getOpenPort()

        cmd = "gpssh -h %s -e 'gpfdist -d %s -p %d -l ./gpfdist.log &'" % (self.host_name, self.tmp_folder, self.gpfdist_port)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        
        cmd = 'ps -ef | grep gpfdist'
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        
        self.output('-- generate data file: %s' % (self.fname))
        cmd = "dbgen -b %s -s %d -T L > %s " % (self.dss, self.scale_factor, self.fname)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        if status != 0:
            print("generate data file %s error. " % (self.fname))
            sys.exit(2)
        self.output('generate data file successed. ')

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd = cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            count = 0
            while(True):
                cmd = 'create database %s;' % (self.database_name)
                (ok, output) = psql.runcmd(cmd = cmd)
                if not ok:
                    count = count + 1
                    time.sleep(1)
                else:
                    self.output(cmd)
                    self.output('\n'.join(output))
                    if self.user != 'gpadmin':
                        cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (self.database_name, self.user)
                        (ok1, output1) = psql.runcmd(cmd = cmd1)
                        self.output(cmd1)
                        self.output('\n'.join(output1))
                    break
                if count == 10:
                    print cmd
                    print '\n'.join(output)
                    sys.exit(2)

        tables = ['lineitem_gpfdist']
        
        niteration = 1
        while niteration <= self.num_iteration:
            self.output('-- Start iteration %d' % (niteration))
            for table_name in tables:
                con_id = -1
                if self.load_data_flag or self.run_workload_flag:
                    with open(data_directory + os.sep + table_name + '.sql', 'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql = cmd, table_name = table_name, num = niteration)

                    # get con_id use this query
                    unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name
                    unique_string2 = '%' + unique_string1 + '%'
                    get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2)
                    
                    with open(self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', 'w') as f:
                        f.write(cmd)
                        f.write(get_con_id_sql)

                    self.output(cmd)    
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', dbname = self.database_name, flag = '-t -A') #, username = self.user)
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])

                    if ok and str(result).find('ERROR') == -1 and str(result).find('FATAL') == -1 and str(result).find('INSERT 0') != -1: 
                        status = 'SUCCESS'
                        con_id = int(result[0].split('***')[1].split('|')[2].strip())
                    else:
                        status = 'ERROR'
                
                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time
                    
                duration = end_time - beg_time
                duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000
                beg_time = str(beg_time).split('.')[0]
                end_time = str(end_time).split('.')[0]
                
                self.output('   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (table_name, niteration, 1, status, duration))
                self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
                    % (self.tr_id, self.s_id, con_id, table_name, niteration, status, beg_time, end_time, duration, self.adj_s_id))
                
            self.output('-- Complete iteration %d' % (niteration))
            niteration += 1
        
        if self.user != 'gpadmin':
            cmd1 = 'REVOKE ALL ON DATABASE %s FROM %s;' % (self.database_name, self.user)
            (ok1, output1) = psql.runcmd(cmd = cmd1)
            self.output(cmd1)
            self.output('\n'.join(output1))

        self.output('-- Complete loading data')      
Пример #18
0
    def run_one_query(self, iteration, stream, qf_name, query, query_type='hawq'):
        con_id = -1
        if self.continue_flag:
            if not self.run_workload_flag:
                status = 'SKIP'
                beg_time = datetime.now()
                end_time = beg_time
            if query_type == 'hive':
                self.run_hive_query(iteration, stream, qf_name, query)
                return
            else:
                if gl.suffix and query_type == 'hawq':
                    query = query.replace('TABLESUFFIX', self.tbl_suffix)
                elif query_type == 'pxf':
                    query = query.replace('TABLESUFFIX', 'r_' + self.pxf_profile)
                else:
                    query = query.replace('_TABLESUFFIX', '')
                query = query.replace('SQLSUFFIX', self.sql_suffix)

                # get con_id use this query
                unique_string1 = '%s_%s_%d_%d_' % (self.workload_name, self.user, iteration, stream) + qf_name
                unique_string2 = '%' + unique_string1 + '%'
                get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2)

                with open(self.tmp_folder + os.sep + '%d_%d_' % (iteration, stream) + qf_name, 'w') as f:
                    f.write(query)
                    f.write(get_con_id_sql)

                self.output(query)
                query_file_path = self.tmp_folder + os.sep + '%d_%d_' % (iteration, stream) + qf_name
                beg_time = datetime.now()
                (ok, result) = psql.runfile(ifile = query_file_path, dbname = self.database_name, username = self.user, flag = '-t -A')
                end_time = datetime.now()

                if ok and str(result).find('psql: FATAL:') == -1 and str(result).find('NOTICE:') == -1 and str(result).find('ERROR:') == -1 and str(result).find('server closed') == -1 :
                    status = 'SUCCESS'
                    # generate output and md5 file
                    with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type + '.output', 'w') as f:
                        f.write(str(result[0].split('***')[0]))
                    with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type +'.output', 'r') as f:
                        query_result = f.read()
                        md5code = hashlib.md5(query_result.encode('utf-8')).hexdigest()
                    with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type +'.md5', 'w') as f:
                        f.write(md5code)

                    # check query result
                    if gl.check_result:
                        ans_file = self.ans_directory + os.sep + qf_name.split('.')[0] + '_' + query_type +'.ans'
                        md5_file = self.ans_directory + os.sep + qf_name.split('.')[0] + '_' + query_type +'.md5'
                        if os.path.exists(ans_file):
                            self.output('Check query result use ans file')
                            if not self.check_query_result(ans_file=ans_file, result_file=self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type +'.output'):
                                status = 'ERROR'
                        elif os.path.exists(md5_file):
                            self.output('Check query result use md5 file')
                            if not self.check_query_result(ans_file=md5_file, result_file=self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type +'.md5'):
                                status = 'ERROR'
                        else:
                            self.output('No answer file')
                            status = 'ERROR'
                         
                    con_id = int(result[0].split('***')[1].split('|')[2].strip())
                else: 
                    status = 'ERROR'
                    self.output('\n'.join(result))
        else:
            status = 'ERROR'
            beg_time = datetime.now()
            end_time = beg_time
            
        duration = end_time - beg_time
        duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds/1000     
        beg_time = str(beg_time)
        end_time = str(end_time)
        self.output('   Execution=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (qf_name.replace('.sql', ''), iteration, stream, status, duration))
        self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Execution', '%s', %d, %d, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
            % (self.tr_id, self.s_id, con_id, qf_name.replace('.sql', ''), iteration, stream, status, beg_time, end_time, duration, self.adj_s_id))
Пример #19
0
    def load_generate_series_data(self, hawq_tables):
        hawq_data_directory = self.workload_directory + os.sep + 'data-gen-series-hawq'
        # run all sql in each loading data file
        for table_name in hawq_tables:
            con_id = -1

            with open(os.path.join(hawq_data_directory, table_name + '.sql'),
                      'r') as f:
                cmd = f.read()
            cmd = self.replace_sql(sql=cmd,
                                   table_name=table_name,
                                   query_type='hawq')
            with open(self.tmp_folder + os.sep + table_name + '.sql',
                      'w') as f:
                f.write(cmd)
            self.output(cmd)

            for batch_num in xrange(self.generate_data_batches_num):
                if self.continue_flag:
                    batch_start_date = (
                        datetime.strptime(str(self.generate_data_start_date),
                                          '%Y%m%d').date() +
                        timedelta(days=batch_num *
                                  ceil(self.generate_data_batch_size /
                                       float(60 * 60 * 24)))
                    ).strftime("%Y%m%d")
                    flags = '-t -A -v v_date_YYYYMMDD="\'%s\'" -v v_nb_rows="%s" ' % (
                        batch_start_date, self.generate_data_batch_size)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(ifile=self.tmp_folder +
                                                os.sep + table_name + '.sql',
                                                dbname=self.database_name,
                                                username='******',
                                                flag=flags)
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])
                    if ok and str(result).find('ERROR') == -1 and str(
                            result).find('FATAL') == -1 and str(result).find(
                                'INSERT 0') != -1:
                        status = 'SUCCESS'
                        con_id = -1
                    else:
                        status = 'ERROR'
                        beg_time = datetime.now()
                        end_time = beg_time
                        self.continue_flag = False
                else:
                    status = 'ERROR'
                    beg_time = datetime.now()
                    end_time = beg_time

                duration = end_time - beg_time
                duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
                beg_time = str(beg_time).split('.')[0]
                end_time = str(end_time).split('.')[0]

                self.output(
                    '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                    % (table_name, batch_num, 1, status, duration))
                self.report_sql(
                    "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);"
                    % (self.tr_id, self.s_id, con_id, table_name, status,
                       beg_time, end_time, duration, self.adj_s_id))
Пример #20
0
    def load_loading(self, tables):
        self.output('\n--Start gpfdist')
        self._start_gpfdist()
        cmd = "gpssh -f %s -e 'ps -ef | grep gpfdist | grep -v grep'" % (
            self.hostfile_seg)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(output)

        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output(
                'ERROR: Cannot find DDL to create tables for TPCDS: %s does not exists'
                % (data_directory))
            sys.exit(2)

        gpfdist_map = {}
        for item in tables:
            gpfdist_map[item] = []

        for cur_host in self.seg_hostname_list:
            cmd = "gpssh -h %s -e 'cat %s'" % (
                cur_host, os.path.join(self.tmp_tpcds_folder, 'dat_files.txt'))
            dat_file_suffix = '.dat'

            (status, output) = commands.getstatusoutput(cmd)
            if status != 0:
                print('Error happen in ls data dir in %s' % (cur_host))
                print(output)
                sys.exit(2)
            else:
                lines = output.split('\n')
                for line in lines:
                    if line.find(dat_file_suffix) != -1:
                        file_name = line.split(' ')[-1].strip()
                        tmp_name = file_name[:file_name.rindex('_')]
                        table_name = tmp_name[:tmp_name.rindex('_')]
                        if table_name not in gpfdist_map.keys():
                            if table_name.find('dbgen_version') == -1:
                                print('Error: %s not find in gpfdist_map' %
                                      (table_name))
                                sys.exit(2)
                        else:
                            gpfdist_map[table_name].append(
                                "'gpfdist://%s:%s/%s'" %
                                (cur_host, self.gpfdist_port, file_name))

        for table_name in tables:
            if len(gpfdist_map[table_name]) == 0:
                print(table_name +
                      ' has no data files, generate data wrong in workload ' +
                      self.workload_name)
            self.output(table_name + ':' + str(len(gpfdist_map[table_name])) +
                        ' data files')

        self.output('\n--Start loading data into tables')
        # run all sql in each loading data file
        for table_name in tables:
            con_id = -1
            if self.continue_flag:
                with open(os.path.join(data_directory, table_name + '.sql'),
                          'r') as f:
                    cmd = f.read()
                cmd = self.replace_sql(sql=cmd, table_name=table_name)
                location = "LOCATION(" + ','.join(
                    gpfdist_map[table_name]) + ")"
                cmd = cmd.replace('LOCATION', location)

                # get con_id use this query
                unique_string1 = '%s_%s_' % (self.workload_name,
                                             self.user) + table_name
                unique_string2 = '%' + unique_string1 + '%'
                get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (
                    unique_string1, unique_string2)

                with open(self.tmp_folder + os.sep + table_name + '.sql',
                          'w') as f:
                    f.write(cmd)
                    f.write(get_con_id_sql)

                self.output(cmd)
                beg_time = datetime.now()
                (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep +
                                            table_name + '.sql',
                                            dbname=self.database_name,
                                            username='******',
                                            flag='-t -A')
                end_time = datetime.now()
                self.output(result[0].split('***')[0])

                if ok and str(result).find('ERROR') == -1 and str(result).find(
                        'FATAL') == -1 and str(result).find('INSERT 0') != -1:
                    status = 'SUCCESS'
                    con_id = int(
                        result[0].split('***')[1].split('|')[2].strip())
                else:
                    status = 'ERROR'
                    beg_time = datetime.now()
                    end_time = beg_time
                    self.continue_flag = False
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
            beg_time = str(beg_time).split('.')[0]
            end_time = str(end_time).split('.')[0]

            self.output(
                '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                % (table_name, 1, 1, status, duration))
            self.report_sql(
                "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);"
                % (self.tr_id, self.s_id, con_id, table_name, status, beg_time,
                   end_time, duration, self.adj_s_id))
Пример #21
0
    def load_data(self):
        self.output('-- Start loading data')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output(
                'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists'
                % (data_directory))
            sys.exit(2)

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            cmd = 'create database %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd, username=self.user)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

        tables = [
            'nation', 'region', 'part', 'supplier', 'partsupp', 'customer',
            'orders', 'lineitem', 'revenue'
        ]
        for table_name in tables:
            if self.continue_flag:
                if self.load_data_flag:
                    with open(data_directory + os.sep + table_name + '.sql',
                              'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql=cmd, table_name=table_name)
                    with open(self.tmp_folder + os.sep + table_name + '.sql',
                              'w') as f:
                        f.write(cmd)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(ifile=self.tmp_folder +
                                                os.sep + table_name + '.sql',
                                                dbname=self.database_name,
                                                username=self.user)
                    end_time = datetime.now()
                    self.output('\n'.join(result))

                    if ok and str(result).find('ERROR') == -1 and str(
                            result).find('FATAL') == -1:
                        status = 'SUCCESS'
                    else:
                        status = 'ERROR'
                        self.continue_flag = False
                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
            beg_time = str(beg_time).split('.')[0]
            end_time = str(end_time).split('.')[0]
            self.output(
                '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                % (table_name, 1, 1, status, duration))
            self.report_sql(
                "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);"
                % (self.tr_id, self.s_id, table_name, status, beg_time,
                   end_time, duration))

        self.output('-- Complete loading data')
Пример #22
0
    def run_one_query(self, iteration, stream, qf_name, query):
        con_id = -1
        if self.continue_flag:
            if self.run_workload_flag:
                if gl.suffix:
                    query = query.replace('TABLESUFFIX', self.tbl_suffix)
                else:
                    query = query.replace('_TABLESUFFIX', '')
                query = query.replace('SQLSUFFIX', self.sql_suffix)

                # get con_id use this query
                unique_string1 = '%s_%s_%d_%d_' % (self.workload_name, self.user, iteration, stream) + qf_name
                unique_string2 = '%' + unique_string1 + '%'
                get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2)

                with open(self.tmp_folder + os.sep + '%d_%d_' % (iteration, stream) + qf_name, 'w') as f:
                    f.write(query)
                    f.write(get_con_id_sql)

                self.output(query)
                beg_time = datetime.now()
                (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + '%d_%d_' % (iteration, stream) + qf_name, dbname = self.database_name, username = self.user, flag = '-t -A')
                end_time = datetime.now()

                if ok and str(result).find('psql: FATAL:') == -1 and str(result).find('ERROR:') == -1 and str(result).find('server closed') == -1 :
                    status = 'SUCCESS'
                    # generate output and md5 file
                    with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.output', 'w') as f:
                        f.write(str(result[0].split('***')[0]))
                    with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.output', 'r') as f:
                        query_result = f.read()
                        md5code = hashlib.md5(query_result.encode('utf-8')).hexdigest()
                    with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.md5', 'w') as f:
                        f.write(md5code)

                    # check query result
                    if gl.check_result:
                        ans_file = self.ans_directory + os.sep + qf_name.split('.')[0] + '.ans'
                        md5_file = self.ans_directory + os.sep + qf_name.split('.')[0] + '.md5'
                        if os.path.exists(ans_file):
                            self.output('Check query result use ans file')
                            if not self.check_query_result(ans_file = ans_file, result_file = self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.output'):
                                status = 'ERROR'
                        elif os.path.exists(md5_file):
                            self.output('Check query result use md5 file')
                            if not self.check_query_result(ans_file = md5_file, result_file = self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.md5'):
                                status = 'ERROR'
                        else:
                            self.output('No answer file')
                            status = 'ERROR'
                         
                    con_id = int(result[0].split('***')[1].split('|')[2].strip())
                else:
                    status = 'ERROR'
                    self.output('\n'.join(result))
            else:
                status = 'SKIP'
                beg_time = datetime.now()
                end_time = beg_time
        else:
            status = 'ERROR'
            beg_time = datetime.now()
            end_time = beg_time
            
        duration = end_time - beg_time
        duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds/1000     
        beg_time = str(beg_time)
        end_time = str(end_time)
        self.output('   Execution=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (qf_name.replace('.sql', ''), iteration, stream, status, duration))
        self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Execution', '%s', %d, %d, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
            % (self.tr_id, self.s_id, con_id, qf_name.replace('.sql', ''), iteration, stream, status, beg_time, end_time, duration, self.adj_s_id))
Пример #23
0
    def load_data(self):
        self.output('-- Start loading data')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output('ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory))
            sys.exit(2)

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd = cmd, username = '******')
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            cmd = 'create database %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd = cmd, username = '******')
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

        tables = ['nation', 'region', 'part', 'supplier', 'partsupp', 'customer', 'orders','lineitem' ,'revenue']
        for table_name in tables:
            con_id = -1
            if self.continue_flag:
                if self.load_data_flag:
                    with open(data_directory + os.sep + table_name + '.sql', 'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql = cmd, table_name = table_name)
                    
                    # get con_id use this query
                    unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name
                    unique_string2 = '%' + unique_string1 + '%'
                    get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2)

                    with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f:
                        f.write(cmd)
                        f.write(get_con_id_sql)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + table_name + '.sql', dbname = self.database_name, flag = '-t -A') #, username = self.user)
                    #(ok,result) = commands.getstatusoutput('psql -d %s -f %s -t -A' % (self.database_name, self.tmp_folder + os.sep + table_name + '.sql'))
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])
                    #self.output('\n'.join(result))
                    
                    if ok and str(result).find('ERROR:') == -1 and str(result).find('FATAL:') == -1 and (str(result).find('INSERT 0') != -1 or str(result).find('CREATE VIEW') != -1):
                        status = 'SUCCESS'
                        con_id = int(result[0].split('***')[1].split('|')[2].strip())
                    else:
                        status = 'ERROR'
                        self.continue_flag = False
                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time
                
            duration = end_time - beg_time
            duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000
            beg_time = str(beg_time)
            end_time = str(end_time)        
            self.output('   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (table_name, 1, 1, status, duration))
            self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
                % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id))
               
        self.output('-- Complete loading data')
Пример #24
0
    def load_data(self):

        if self.load_data_flag:
            beg_time = datetime.now()
            end_time = beg_time
            status = 'ERROE'
            if self.check_seeds() and self.prep_e_tables() and self.prep_udfs(
            ):
                status = 'SUCCESS'
                os.system(self.scripts_dir + os.sep + 'prep_GUCs.sh')

                # dca_demo_conf set the data scale
                with open(self.scripts_dir + os.sep + 'dca_demo_conf.sql',
                          'r') as f:
                    sql = f.read()
                    scale = int(8500000000 / 10.4 / 1024 * self.scale_factor)
                    sql = sql.replace('8500000000', str(scale))
                with open(self.tmp_folder + os.sep + 'dca_demo_conf.sql',
                          'w') as f:
                    f.write(sql)

                scripts = [
                    'prep_dimensions.sql', 'prep_facts.sql',
                    'gen_order_base.sql', 'gen_facts.sql'
                ]
                for script in scripts:
                    self.output('------ start %s ------' % (script))

                    with open(self.scripts_dir + os.sep + script, 'r') as f:
                        sql = f.read()
                    sql = sql.replace(
                        'PATH_OF_DCA_DEMO_CONF_SQL',
                        '\i %s/dca_demo_conf.sql' % (self.tmp_folder))
                    sql = sql.replace('SQLSUFFIX', self.sql_suffix)
                    with open(self.tmp_folder + os.sep + script, 'w') as f:
                        f.write(sql)

                    (ok, result) = psql.runfile(ifile=self.tmp_folder +
                                                os.sep + script,
                                                dbname=self.database_name,
                                                flag='-e')
                    self.output('\n'.join(result))

                    if ok and str(result).find('ERROR') == -1:
                        end_time = datetime.now()
                    else:
                        status = 'ERROR'
                        self.output('%s is error!' % (script))
                        end_time = datetime.now()
                        break

        else:
            status = 'SKIP'
            beg_time = datetime.now()
            end_time = beg_time

        duration = end_time - beg_time
        duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
        beg_time = str(beg_time).split('.')[0]
        end_time = str(end_time).split('.')[0]

        self.output(
            '   Loading   Iteration=%d   Stream=%d   Status=%s   Time=%d' %
            (1, 1, status, duration))
        self.report_sql(
            "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);"
            % (self.tr_id, self.s_id, 'retail_dw_' + self.tbl_suffix, 1,
               status, beg_time, end_time, duration))
Пример #25
0
    def load_data(self):
        self.output('-- Start loading data ')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output(
                'ERROR: Cannot find DDL to create tables for Copy: %s does not exists'
                % (data_directory))
            return

        self.output('-- start gpfdist service')
        self.gpfdist_port = self._getOpenPort()

        cmd = "gpssh -h %s -e 'gpfdist -d %s -p %d -l ./gpfdist.log &'" % (
            self.host_name, self.tmp_folder, self.gpfdist_port)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)

        cmd = 'ps -ef | grep gpfdist'
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)

        self.output('-- generate data file: %s' % (self.fname))
        cmd = "dbgen -b %s -s %d -T L > %s " % (self.dss, self.scale_factor,
                                                self.fname)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        if status != 0:
            print("generate data file %s error. " % (self.fname))
            sys.exit(2)
        self.output('generate data file successed. ')

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            count = 0
            while (True):
                cmd = 'create database %s;' % (self.database_name)
                (ok, output) = psql.runcmd(cmd=cmd)
                if not ok:
                    count = count + 1
                    time.sleep(1)
                else:
                    self.output(cmd)
                    self.output('\n'.join(output))
                    if self.user != 'gpadmin':
                        cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (
                            self.database_name, self.user)
                        (ok1, output1) = psql.runcmd(cmd=cmd1)
                        self.output(cmd1)
                        self.output('\n'.join(output1))
                    break
                if count == 10:
                    print cmd
                    print '\n'.join(output)
                    sys.exit(2)

        tables = ['lineitem_gpfdist']

        niteration = 1
        while niteration <= self.num_iteration:
            self.output('-- Start iteration %d' % (niteration))
            for table_name in tables:
                con_id = -1
                if self.load_data_flag or self.run_workload_flag:
                    with open(data_directory + os.sep + table_name + '.sql',
                              'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql=cmd,
                                           table_name=table_name,
                                           num=niteration)

                    # get con_id use this query
                    unique_string1 = '%s_%s_' % (self.workload_name,
                                                 self.user) + table_name
                    unique_string2 = '%' + unique_string1 + '%'
                    get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (
                        unique_string1, unique_string2)

                    with open(
                            self.tmp_folder + os.sep +
                            'gpfdist_loading_temp.sql', 'w') as f:
                        f.write(cmd)
                        f.write(get_con_id_sql)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(
                        ifile=self.tmp_folder + os.sep +
                        'gpfdist_loading_temp.sql',
                        dbname=self.database_name,
                        flag='-t -A')  #, username = self.user)
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])

                    if ok and str(result).find('ERROR') == -1 and str(
                            result).find('FATAL') == -1 and str(result).find(
                                'INSERT 0') != -1:
                        status = 'SUCCESS'
                        con_id = int(
                            result[0].split('***')[1].split('|')[2].strip())
                    else:
                        status = 'ERROR'

                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time

                duration = end_time - beg_time
                duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
                beg_time = str(beg_time).split('.')[0]
                end_time = str(end_time).split('.')[0]

                self.output(
                    '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                    % (table_name, niteration, 1, status, duration))
                self.report_sql(
                    "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);"
                    % (self.tr_id, self.s_id, con_id, table_name, niteration,
                       status, beg_time, end_time, duration, self.adj_s_id))

            self.output('-- Complete iteration %d' % (niteration))
            niteration += 1

        if self.user != 'gpadmin':
            cmd1 = 'REVOKE ALL ON DATABASE %s FROM %s;' % (self.database_name,
                                                           self.user)
            (ok1, output1) = psql.runcmd(cmd=cmd1)
            self.output(cmd1)
            self.output('\n'.join(output1))

        self.output('-- Complete loading data')
Пример #26
0
    def load_loading(self, tables):
        self.output('\n--Start gpfdist')
        self._start_gpfdist()
        cmd = "gpssh -f %s -e 'ps -ef | grep gpfdist | grep -v grep'" % (self.hostfile_seg)   
        (status, output) = commands.getstatusoutput(cmd)
        self.output(output)

        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output('ERROR: Cannot find DDL to create tables for TPCDS: %s does not exists' % (data_directory))
            sys.exit(2)

        gpfdist_map = {}
        for item in tables:
            gpfdist_map[item] = []
        
        for cur_host in self.seg_hostname_list:
            cmd = "gpssh -h %s -e 'cat %s'" % (cur_host, os.path.join(self.tmp_tpcds_folder, 'dat_files.txt'))
            dat_file_suffix = '.dat'
            
            (status, output) = commands.getstatusoutput(cmd)
            if status != 0:
                print('Error happen in ls data dir in %s' % (cur_host))
                print(output)
                sys.exit(2)
            else:
                lines = output.split('\n')
                for line in lines:
                    if line.find(dat_file_suffix) != -1:
                        file_name = line.split(' ')[-1].strip()
                        tmp_name = file_name[:file_name.rindex('_')]
                        table_name = tmp_name[:tmp_name.rindex('_')]
                        if table_name not in gpfdist_map.keys():
                            if table_name.find('dbgen_version') == -1:
                                print('Error: %s not find in gpfdist_map' % (table_name))
                                sys.exit(2)
                        else:
                            gpfdist_map[table_name].append("'gpfdist://%s:%s/%s'" % (cur_host, self.gpfdist_port, file_name))

        for table_name in tables:
            if len(gpfdist_map[table_name]) == 0:
                print(table_name + ' has no data files, generate data wrong in workload ' + self.workload_name)
            self.output(table_name + ':' + str(len(gpfdist_map[table_name])) + ' data files')
        
        self.output('\n--Start loading data into tables')
        # run all sql in each loading data file
        for table_name in tables:
            con_id = -1
            if self.continue_flag:
                with open(os.path.join(data_directory, table_name + '.sql'), 'r') as f:
                    cmd = f.read()
                cmd = self.replace_sql(sql = cmd, table_name = table_name)
                location = "LOCATION(" + ','.join(gpfdist_map[table_name]) + ")"
                cmd = cmd.replace('LOCATION', location)

                # get con_id use this query
                unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name
                unique_string2 = '%' + unique_string1 + '%'
                get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2)

                with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f:
                    f.write(cmd)
                    f.write(get_con_id_sql)

                self.output(cmd)    
                beg_time = datetime.now()
                (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + table_name + '.sql', dbname = self.database_name, username = '******', flag = '-t -A')
                end_time = datetime.now()
                self.output(result[0].split('***')[0])
                
                if ok and str(result).find('ERROR') == -1 and str(result).find('FATAL') == -1 and str(result).find('INSERT 0') != -1: 
                    status = 'SUCCESS'
                    con_id = int(result[0].split('***')[1].split('|')[2].strip())
                else:
                    status = 'ERROR'
                    beg_time = datetime.now()
                    end_time = beg_time
                    self.continue_flag = False
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000
            beg_time = str(beg_time).split('.')[0]
            end_time = str(end_time).split('.')[0]
            
            self.output('   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (table_name, 1, 1, status, duration))
            self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
                % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id))