Exemplo n.º 1
0
 def load_table(self):
     print("Load dim table start.")
     tbls = local_config()['tpcds_tbls']
     thread_list = []
     for tbl in tbls.keys():
         if tbl in local_config()['partition_tbls']:
             continue
         print("Loading table %s..."%(tbl))
         tbl_name = tbl
         et_tbl_name = tbls[tbl][0]
         create_tbl = """
                 use %s;
                 drop table if exists %s;
                 create table %s like %s stored as %s;
                 set COMPRESSION_CODEC= %s;
                 insert overwrite table %s select * from %s;
         """%(self.db_name,tbl_name,tbl_name,et_tbl_name,self.tbl_format,self.tbl_compress_codec,tbl_name,et_tbl_name)
         cmd = "impala-shell -i %s:21000  <<EOF %s \n EOF"%(local_config()['impalad_nodes'][0],create_tbl)
         t = threading.Thread(target=os.system,args=[cmd])
         thread_list.append(t)
         t.start()
     for t in thread_list:
         if t.is_alive():
             t.join()
     print("Load completed")
def gen_partition_tbl(tbl,start_thread_id,total_threads_num):
    tpcds_tool_root = local_config()['tpcds_tool_root']
    thread_count = local_config()['dsdgen_threads_per_node']
    thread_list = []
    for thread_i in range(start_thread_id,start_thread_id + thread_count):
        cmd = "%s \
                         -TABLE %s \
                         -SCALE %s \
                         -CHILD %s \
                         -PARALLEL %s \
                         -DISTRIBUTIONS %s \
                         -TERMINATE N \
                         -FILTER Y \
                         -QUIET Y | hdfs dfs -put - %s.dat\
               "%(os.path.join(tpcds_tool_root,'tools','dsdgen'),
                                tbl,
                                local_config()['tpcds_scale_factor'],
                                thread_i,
                                total_threads_num,
                                os.path.join(tpcds_tool_root,'tools','tpcds.idx'),
                                os.path.join(local_config()['flatfile_path_HDFS'],tbl,'_'.join([tbl,str(thread_i),str(total_threads_num)])))
        t = threading.Thread(target = os.system,args=[cmd])
        thread_list.append(t)
        t.start()
    for t in thread_list:
        if t.is_alive():
            t.join()
    print('Generated')
Exemplo n.º 3
0
 def create_partition_tbl(self):
     for p_tbl in local_config()['partition_tbls']:
         tbl_name = p_tbl
         et_tbl_name = local_config()['tpcds_tbls'][p_tbl][0]
         #create partition tbl
         cmd = "impala-shell -i %s:21000 -d %s <<EOF %s \n EOF"%(local_config()['impalad_nodes'][0],self.db_name,self.partition_tbls[p_tbl])
         os.system(cmd)
Exemplo n.º 4
0
def push():
    hosts = local_config()['master']
    hosts.extend(local_config()['impalad_nodes'])
    cur_path = os.path.dirname(os.path.realpath(__file__))
    for host in hosts:
        #mkdir
        os.system("ssh %s mkdir -p %s"%(host,local_config()['work_dir']))
        os.system('scp -r %s/* %s:%s'%(cur_path,host,local_config()['work_dir']))
    return hosts
Exemplo n.º 5
0
    def __init__(self):
        self.db_format = local_config()['db_format']
        self.tbl_format = local_config()['tbl_format_dic'][self.db_format[0]]
        self.tbl_compress_codec = local_config()['tbl_compress_codec_dic'][self.db_format[1]]
        self.db_name = local_config()['db_name']
        with open(os.path.join(local_config()['tpcds_tool_root'],'distinct-ss-sold-date.txt'), 'r') as f:
             self.ss_sold_dates = sorted([d.strip() for d in f.readlines()])

        self.init_tbl()
def StoreStockPriceInfo(market,Stock_List,queue):
    with open(os.path.join(local_config()['install_path'],'output.csv'),'w',encoding='utf-8') as fp_w:
        consumer_num = local_config()['consumer_num']
        lock = Lock()
        consumer_proc_list = []
        for i in range(consumer_num):
            each_job = StoreStockPriceInfo_job(market)
            consumer_proc = Thread(target =each_job.StorePriceInfo ,args=[queue,lock,fp_w])
            consumer_proc.start()
            consumer_proc_list.append(consumer_proc)
        for proc in consumer_proc_list:
            if proc.is_alive():
                proc.join()
def main():
    #program log path
    datetime_now = str(datetime.datetime.now().strftime("DATE%Y-%m-%d_TIME%H-%M"))
    #datetime_now = str("DATE2015-12-21_TIME13-24")
    start_timestamp = time.mktime(time.strptime(datetime_now,"DATE%Y-%m-%d_TIME%H-%M"))
    log_dir = './queries/logs/%s'%(datetime_now)
    
    if not os.path.isdir(log_dir):
        os.system('mkdir -p %s'%(log_dir))

    start = time.time()
    with open("%s/renderman.log"%(log_dir),'w') as logging:
        for user_number in local_config()['user_number']:
            start_timestamp = time.mktime(time.strptime(datetime.datetime.now().strftime("DATE%Y-%m-%d_TIME%H-%M"),"DATE%Y-%m-%d_TIME%H-%M"))
            #init & start consumer
            base_log_dir = os.path.join(log_dir,''.join(['user_number=',str(user_number)]))
            user_list = []
            clear_cache()

            for i in xrange(user_number):
                u = Process(target=distinct_user.user, args = [i,base_log_dir,logging])
                user_list.append(u)
            for user in user_list:
                user.start()
            while True:
                running = 0
                for user in user_list:
                   if user.is_alive():
                       running = running + 1
                if running == 0 :
                    break
            #collect logs
            ##remote_log
            print("Preparing remote logs")
            for desired_log in local_config()['desired_logs']['remote']:
                cmd_list = map(lambda x : 'ssh %s "python /home/wangdewei/extractLog.py %s %s"'%(x,str(start_timestamp),desired_log),local_config()['impalad_nodes'])
                res = [os.system(cmd) for cmd in cmd_list]
            #  scp
            scp_list = map(lambda x: 'scp %s:/home/wangdewei/extractLog/* %s'%(x,base_log_dir),local_config()['impalad_nodes'])
            res = [os.system(scp) for scp in scp_list]
            ##local
            print("Preparing local logs")
            for desired_log in local_config()['desired_logs']['local']:
                cmd = 'python extractLog.py %s %s %s'%(desired_log,str(start_timestamp),base_log_dir)
                print(cmd)
         #       os.system(cmd)

    collect_result(log_dir)
    end = time.time()
    duration = end - start
    print 'run in %f seconds'%(duration)
 def run_testing(self):
     conn = connect(host='172.168.0.24', port=21050,database = 'tpcds_text_150')
     cursor = conn.cursor()
     count = 0
     cur_task = 0
     query_time = []
     with open(os.path.join(self.profile_path,'RESULT'),'w') as fp_w:
         for query in self.query_list:
             if query not in self.des_dic.keys():
                  self.des_dic[query] = {}
             for cpufreq in self.cpufreq_range:
                 if cpufreq not in self.des_dic[query].keys():
                     self.CPUFreqSet_OBJ.set(cpufreq)
                     self.des_dic[query][cpufreq] = []
                 start_time = time.time()
                 #for i in range(self.every_query_times):
                 
                 with open(os.path.join(local_config()['query_dir'],query),'r') as fp:
                      sql= fp.read().strip('profile;\n')
                      sql = sql.strip('; ')
                 while True:
                     query_start_time = time.time()
                     cursor.execute('%s'%(sql))
                     end_time = time.time()
                     query_time.append(end_time - query_start_time)
                     while True:
                         row=cursor.fetchone()
                         if row:
                             pass
                         else:
                             break
                     
                     cur_profile = cursor.get_profile()
                     
                     count = count + 1
                     
                     with open(os.path.join(self.profile_path,str(count)+'.log'),'w') as fp_profile:
                         fp_profile.write("%s"%(cur_profile))
                     
                     if end_time - start_time > local_config()['duration_time'] :
                         break
                     
                 self.des_dic[query][cpufreq].sort()
         print "%s,%s,%s"%(self.users_InTotal,self.user_name,count)
         #print >>self.logging,"%s,%s,%s"%(self.profile_path,self.user_name,count)
         fp_w.write("%s,%s,%s,%s"%(self.users_InTotal,self.user_name,count, query_time))
     cursor.close()
     conn.close()
     return self.des_dic
Exemplo n.º 9
0
def get_tuple_task_dic():
    task_dic = {}
    if os.path.isdir(local_config()['impaladlog_path']):
        for root,dirs,files in os.walk(local_config()['impaladlog_path']):
            for file in files:
                if file.endswith('.hehe024.impaladlog'):
                    query_name = file.split('.')[0]
                    if query_name not in task_dic.keys():
                        task_dic[query_name] = [os.path.join(root,file),'tuple_descriptor_'+query_name+'.xml']                    
    elif os.path.isfile(local_config()['impaladlog_path']):
        file = os.path.split(local_config()['impaladlog_path'])[-1]
        query_name = file.split('.')[0]
        if query_name not in task_dic.keys():
            task_dic[query_name] = [local_config()['impaladlog_path'],'tuple_descriptor_'+query_name+'.xml']
    return task_dic
Exemplo n.º 10
0
def get_task_dic():
    task_dic = {}
    if os.path.isdir(local_config()['profile_path']):
        for root,dirs,files in os.walk(local_config()['profile_path']):
            for file in files:
                if file.endswith('.sql.log'):
                    query_name = file.split('.')[0]
                    if query_name not in task_dic.keys():
                        task_dic[query_name] = [os.path.join(root,file),query_name+'.sql.xml']                    
    elif os.path.isfile(local_config()['profile_path']):
        file = os.path.split(local_config()['profile_path'])[-1]
        query_name = file.split('.')[0]
        if query_name not in task_dic.keys():
            task_dic[query_name] = [local_config()['profile_path'],query_name+'.sql.xml']
    return task_dic
Exemplo n.º 11
0
def hdfs_mdir():
    cmd = "hdfs dfs -mkdir -p %s"
    root_dir = local_config()['flatfile_path_HDFS']
    overwrite = local_config()['overwrite']
    with os.popen('hdfs dfs -test -d %s;echo $?'%(root_dir)) as fp:
        tmp = fp.read().strip('\n')
        if tmp == '0':
            if not overwrite :
                #directory is exists ,no need to generate data
                print('%s is exists.There is no need to generate data'%(root_dir))
                return False
            else:
                os.system('hdfs dfs -rm -r -f %s'%(root_dir))
    dirs = list(map(lambda x :os.path.join(root_dir,x) ,local_config()['tpcds_tbls'].keys()))
    for hdfs_dir in dirs:
        print("Making directory:%s"%(hdfs_dir))
        os.system(cmd%(hdfs_dir))
    return True
 def StorePriceInfo(self,queue,lock,output_media):
     output_type = local_config()['output_type']
     while True:
         lock.acquire()
         if output_type == 'csv':
             while not queue.empty():
                 temp = queue.get() + str(queue.qsize()) +'\n'
                 output_media.write(temp)
         time.sleep(2)
         lock.release()
 def __init__(self, log_file,xml_filename,logging):
     self.logging = logging
     self.xml_filename = xml_filename
     with open(log_file,'r') as fp:
         self.logfile = fp.read()
         self.part_impalad = (0, self.logfile.find('+----------+'))
         self.part_queryResult = (self.part_impalad[1], self.logfile.find('Query Runtime Profile', self.part_impalad[1] + 1))
         self.part_queryRuntime = (self.part_queryResult[1], self.logfile.find('----------------', self.part_queryResult[1] + 1))
         self.part_planFragment = (self.part_queryRuntime[1], self.logfile.find('----------------', self.part_queryRuntime[1] + 1))
         self.part_eachnodecosts = (self.part_planFragment[1], self.logfile.find('    Coordinator Fragment', self.part_planFragment[1] + 1))
         self.part_plannode = (self.part_eachnodecosts[1], self.logfile.find('\n    Fragment F',self.part_eachnodecosts[1]+1))
         self.part_instances = (self.part_plannode[1],len(self.logfile))
     #1.plan.xml
     self.obj_xml = xml_writers(xml_file = os.path.join(local_config()['install_dir'],'output_dir',(self.xml_filename)),query_name = xml_filename[0:-4])
     self.dom = self.obj_xml.create_dom()
     self.obj_node = xml_node(self.dom)
     
     #2.hbase.dist
     self.hbase_dist_file = os.path.join(local_config()['install_dir'],'output_dir',(self.xml_filename[0:-3]+'hbase.dist'))
Exemplo n.º 14
0
def gen_dims():
    root_dir = local_config()['flatfile_path_HDFS']
    proc_list = []
    print('Generating table flat file!')
    for tbl in local_config()['tpcds_tbls'].keys():
        if tbl in local_config()['partition_tbls']:
            continue
        print('%s is generating......'%(tbl))
        cmd = "%s \
                -TABLE %s \
                -SCALE %s \
                -DISTRIBUTIONS %s \
                -TERMINATE N \
                -FILTER Y \
                -QUIET Y |hdfs dfs -put - %s.dat \
            "%(os.path.join(local_config()['tpcds_tool_root'],'tools','dsdgen'),
                tbl,
                local_config()['tpcds_scale_factor'],
                os.path.join(local_config()['tpcds_tool_root'],'tools','tpcds.idx'),
                os.path.join(root_dir,tbl,tbl))
        proc = Process(target=os.system,args=[cmd])
        proc_list.append(proc)
        proc.start()
    for proc in proc_list:
        if proc.is_alive():
            proc.join()
    print('Generated')
Exemplo n.º 15
0
def gen_facts(hosts):
    tbls = local_config()['partition_tbls']
    tpcds_tool_root = local_config()['tpcds_tool_root']
    print('Partitioned table is generating flat file!')
    for tbl in tbls:
        print('%s is generating......'%(tbl))
        proc_list = []
        thread_count = local_config()['dsdgen_threads_per_node']
        total_threads_num = len(hosts) * thread_count
        for i,host in enumerate(hosts):
            start_thread_id = i*thread_count + 1
            cmd = "ssh %s 'cd %s;python gen_partition_table.py %s %s %s'"%(host,           
                                                                            local_config()['work_dir'],
                                                                            tbl,
                                                                            str(start_thread_id),
                                                                            str(total_threads_num))
            proc = Process(target = os.system,args=[cmd])
            proc_list.append(proc)
            proc.start()
        for proc in proc_list:
            if proc.is_alive():
                proc.join()
def user(user_name,baselog_dir,logging):
    #ex_wb = excel_writer(datetime_now + '.xlsx')
    log_dir = os.path.join(baselog_dir,str(user_name))
    if not os.path.isdir(log_dir):
        os.system("mkdir -p %s"%(log_dir))

    for test_case in local_config()['test_case_list']:
        if test_case not in testing.TestingClassDict.keys():
            print testing.TestingClassDict
            print '%s has not been completed...'%(test_case)
        else:
            test_obj = testing.TestingClassDict[test_case](logging,log_dir,user_name)
            test_obj.run_testing()
 def __init__(self,logging,log_dir,user_name):
     self.impalad_nodes = local_config()['impalad_nodes']
     self.cpufreq_range = local_config()['CPUFreq_range']
     self.query_list = local_config()['query_list']
     self.query_dir = local_config()['query_dir']
     self.impala_server = local_config()['IMPALA_SERVER']
     self.database_name = local_config()['DATABASE_NAME']
     self.sh_path = os.path.join(local_config()['install_path'],'queries','runsql.sh')
     self.profile_path = log_dir 
     self.network = local_config()['net_work']
     self.des_dic = {}
     self.CPUFreqSet_OBJ = CPUFreqSet()
     self.check_sys_state_obj = check_sys_state()
     self.logging = logging
     self.user_name = user_name
     self.users_InTotal = os.path.split(os.path.split(self.profile_path)[0])[1]
def GetStockPriceInfo(market,Stock_List,queue):
    step_size = local_config()['step_size']
    partitions = [Stock_List[i:i+step_size] for i in range( 0,\
                                                                  len(Stock_List),\
                                                                  step_size)]
    proc_list = []
    lock = Lock()
    for part in partitions:
        all_url = ''.join([str_i + ',' for str_i in part])
        each_job = GetStockpriceInfo_job(market,all_url)
        proc = Thread(target = each_job.GetPriceInfo,args=[queue,lock])
        proc.start()
        proc_list.append(proc)
    
    for proc in proc_list:
        if proc.is_alive():
            proc.join()
Exemplo n.º 19
0
 def load_partition_tbl_store_sales(self):
     num_part_per_query = 200
     partition_ranges = [self.ss_sold_dates[i: i + num_part_per_query] for i in range(0, len(self.ss_sold_dates), num_part_per_query)]
     assert sum([len(r) for r in partition_ranges]) == len(self.ss_sold_dates)
     thread_list = []
     for partition_range in partition_ranges:
         query = """
                 insert overwrite table store_sales
                 partition(ss_sold_date_sk) [shuffle]
                 select
                 ss_sold_time_sk,
                 ss_item_sk,
                 ss_customer_sk,
                 ss_cdemo_sk,
                 ss_hdemo_sk,
                 ss_addr_sk,
                 ss_store_sk,
                 ss_promo_sk,
                 ss_ticket_number,
                 ss_quantity,
                 ss_wholesale_cost,
                 ss_list_price,
                 ss_sales_price,
                 ss_ext_discount_amt,
                 ss_ext_sales_price,
                 ss_ext_wholesale_cost,
                 ss_ext_list_price,
                 ss_ext_tax,
                 ss_coupon_amt,
                 ss_net_paid,
                 ss_net_paid_inc_tax,
                 ss_net_profit,
                 ss_sold_date_sk
                 from et_store_sales
                 where ss_sold_date_sk
                 between {0} and {1};""".format(partition_range[0], partition_range[-1])
         cmd = "impala-shell -i %s:21000 -d %s <<EOF %s \nEOF"%(local_config()['impalad_nodes'][0],self.db_name,query)
         print (cmd)
         t = threading.Thread(target=os.system,args=[cmd])
         thread_list.append(t)
         t.start()
     for t in thread_list:
         if t.is_alive():
             t.join()
    def __init__(self,pl_i,plnode,logging,LR):
        self.attri_dic = {}
        self.logging = logging
        self.attri_dic['lr'] = LR
        with_scan_hdfs = False
        if pl_i == 0 :
            self.attri_dic['is_plan_root'] = 'true'
        else:
            self.attri_dic['is_plan_root'] = 'false'
        for line in plnode.split('\n'):
            line = line.strip()
            line = line.strip('|')
            line = line.strip('--')
            line = line.strip()
            if re.search('\d+:',line) and 'nid' not in self.attri_dic.keys():
                line_key,line_val = line.split(':')
                self.attri_dic['nid'] = str(int(line_key))
                node_type,node_type_attri = (' [' in line_val) and line_val.split(' [') or [line_val,None]
                self.attri_dic['display_name'] = node_type
                if node_type_attri:
                    node_type_attri = node_type_attri.strip(']')
                    if re.match('HASH JOIN',node_type):
                        join_type,dist_type = node_type_attri.split(', ')
                        self.attri_dic['join_type'] = join_type
                        self.ana_Distributiontype(dist_type)
                    elif re.match('SCAN HDFS', node_type):
                        scan_table,dist_type = node_type_attri.split(', ')
                        scan_db,scan_tbl = scan_table.split('.')
                        if ' ' in scan_db:
                            scan_db = scan_db.split(' ')[0]
                        if ' ' in scan_tbl:
                            scan_tbl = scan_tbl.split(' ')[0]
                        self.attri_dic['database_name'] = scan_db
                        self.attri_dic['table'] = scan_tbl
                        
                        #self.ana_Distributiontype(dist_type)
                        if local_config()['partitioned_table_name'] in node_type_attri:
                            with_scan_hdfs = True
                    elif re.match('SCAN HBASE', node_type):
                        scan_table = node_type_attri
                        scan_db,scan_tbl = scan_table.split('.')
                        if ' ' in scan_db:
                            scan_db = scan_db.split(' ')[0]
                        if ' ' in scan_tbl:
                            scan_tbl = scan_tbl.split(' ')[0]
                        self.attri_dic['database_name'] = scan_db
                        self.attri_dic['table'] = scan_tbl
                        
#                         #self.ana_Distributiontype(dist_type)
#                         if local_config()['partitioned_table_name'] in node_type_attri:
#                             with_scan_hdfs = True
                    elif re.match('TOP-N', node_type):
                        self.attri_dic['limit'] = str(int(node_type_attri.split('=')[1]))
                    else:    
                        self.ana_Distributiontype(node_type_attri)
            #NODE:MERGE-EXCHANGE maybe has limit attribute       
            m = re.match('limit:\s\d+',line)
            if m:
                self.attri_dic['limit'] = str(int(line.split(': ')[1]))
            if self.attri_dic['display_name'] == 'SCAN HDFS':
                m = re.match('partitions=\d+/\d+',line)
                if m:
                    self.attri_dic['partitions'] = str(m.group(0).split('=')[1]) 
            #data percentage,只需执行一次,获得data_percentage属性
            if with_scan_hdfs:
                self.collect_data_percentage(lines=plnode.split('\n'),total_table_size_in_bytes = local_config()['total_table_size_in_bytes'])
                with_scan_hdfs = False
                           
            if 'tuple-ids' in line \
                    and 'row-size' in line \
                    and 'cardinality' in line:
                    tuple_ids,row_size,cardinality, = line.split(' ')
                    self.attri_dic['tuple-ids'] = tuple_ids.split('=')[1]
                    self.attri_dic['row-size'] = str(int(row_size.split('=')[1][:-1]))
Exemplo n.º 21
0
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from config import prod_config,local_config,checkENV


DB_URL=''
if checkENV()=='LOCAL':
    # it is local envrn
    lc=local_config()
    DB_URL=lc.DB_URI
    print('database.py called in Local environment '+str(DB_URL))

else:
    pc=prod_config()
    DB_URL=pc.DB_URI

    print('database.py called in Prod environment URL->'+str(DB_URL))


engine = create_engine(DB_URL, echo=False)

Session = sessionmaker(autocommit=False,autoflush=False,bind=engine)
Base=declarative_base()
metadata = Base.metadata


def addUser(name, age, email, phone, dose_no,search_by,pincode ,state_id=-1,dist_id=-1,dist_name='NA'):
    from cowin_get_email.databases.user_model import addUser
    
    
Exemplo n.º 22
0
    if os.path.isdir(local_config()['impaladlog_path']):
        for root,dirs,files in os.walk(local_config()['impaladlog_path']):
            for file in files:
                if file.endswith('.hehe024.impaladlog'):
                    query_name = file.split('.')[0]
                    if query_name not in task_dic.keys():
                        task_dic[query_name] = [os.path.join(root,file),'tuple_descriptor_'+query_name+'.xml']                    
    elif os.path.isfile(local_config()['impaladlog_path']):
        file = os.path.split(local_config()['impaladlog_path'])[-1]
        query_name = file.split('.')[0]
        if query_name not in task_dic.keys():
            task_dic[query_name] = [local_config()['impaladlog_path'],'tuple_descriptor_'+query_name+'.xml']
    return task_dic

if __name__ == '__main__':
    if 'Linux' in local_config()['current_sys']:
        os.system('rm -f ./output_dir/* ')
    elif 'Windows' in local_config()['current_sys']:
        os.system('del .\output_dir\*  /q ')
    #Profile analysis
    if local_config()['sw_input'] is True:
        task_dic = get_task_dic()
        for task in task_dic.keys():
            print(task_dic[task][0])
            #1.xml
            obj = class_log(task_dic[task][0],task_dic[task][1],logging)
            obj.getAttri()
            obj.writeToXML()
            #hbase dist
            obj.writeToHBASEdist()
    
Exemplo n.º 23
0
    def init_tbl(self):
        #create external table
        self.tbl_dic = {
            "et_store_sales":"""
                create external table et_store_sales
                (
                  ss_sold_date_sk           int,
                  ss_sold_time_sk           int,
                  ss_item_sk                int,
                  ss_customer_sk            int,
                  ss_cdemo_sk               int,
                  ss_hdemo_sk               int,
                  ss_addr_sk                int,
                  ss_store_sk               int,
                  ss_promo_sk               int,
                  ss_ticket_number          int,
                  ss_quantity               int,
                  ss_wholesale_cost         double,
                  ss_list_price             double,
                  ss_sales_price            double,
                  ss_ext_discount_amt       double,
                  ss_ext_sales_price        double,
                  ss_ext_wholesale_cost     double,
                  ss_ext_list_price         double,
                  ss_ext_tax                double,
                  ss_coupon_amt             double,
                  ss_net_paid               double,
                  ss_net_paid_inc_tax       double,
                  ss_net_profit             double
                )
                row format delimited fields terminated by '|'
                location '%s/store_sales'
                tblproperties ('serialization.null.format'='')
      ;"""%(local_config()['flatfile_path_HDFS']),
        "et_customer_demographics":"""
                create external table et_customer_demographics
                (
                  cd_demo_sk                int,
                  cd_gender                 string,
                  cd_marital_status         string,
                  cd_education_status       string,
                  cd_purchase_estimate      int,
                  cd_credit_rating          string,
                  cd_dep_count              int,
                  cd_dep_employed_count     int,
                  cd_dep_college_count      int
                )
                row format delimited fields terminated by '|'
                location '%s/customer_demographics'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_date_dim":"""
                create external table et_date_dim
                (
                  d_date_sk                 int,
                  d_date_id                 string,
                  d_date                    string, -- YYYY-MM-DD format
                  d_month_seq               int,
                  d_week_seq                int,
                  d_quarter_seq             int,
                  d_year                    int,
                  d_dow                     int,
                  d_moy                     int,
                  d_dom                     int,
                  d_qoy                     int,
                  d_fy_year                 int,
                  d_fy_quarter_seq          int,
                  d_fy_week_seq             int,
                  d_day_name                string,
                  d_quarter_name            string,
                  d_holiday                 string,
                  d_weekend                 string,
                  d_following_holiday       string,
                  d_first_dom               int,
                  d_last_dom                int,
                  d_same_day_ly             int,
                  d_same_day_lq             int,
                  d_current_day             string,
                  d_current_week            string,
                  d_current_month           string,
                  d_current_quarter         string,
                  d_current_year            string
                )
                row format delimited fields terminated by '|'
                location '%s/date_dim'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_time_dim":"""
                create external table et_time_dim
                (
                  t_time_sk                 int,
                  t_time_id                 string,
                  t_time                    int,
                  t_hour                    int,
                  t_minute                  int,
                  t_second                  int,
                  t_am_pm                   string,
                  t_shift                   string,
                  t_sub_shift               string,
                  t_meal_time               string
                )
                row format delimited fields terminated by '|'
                location '%s/time_dim'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_item":"""
                create external table et_item
                (
                  i_item_sk                 int,
                  i_item_id                 string,
                  i_rec_start_date          string,
                  i_rec_end_date            string,
                  i_item_desc               string,
                  i_current_price           double,
                  i_wholesale_cost          double,
                  i_brand_id                int,
                  i_brand                   string,
                  i_class_id                int,
                  i_class                   string,
                  i_category_id             int,
                  i_category                string,
                  i_manufact_id             int,
                  i_manufact                string,
                  i_size                    string,
                  i_formulation             string,
                  i_color                   string,
                  i_units                   string,
                  i_container               string,
                  i_manager_id              int,
                  i_product_name            string
                )
                row format delimited fields terminated by '|'
                location '%s/item'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_store":"""
                create external table et_store
                (
                  s_store_sk                int,
                  s_store_id                string,
                  s_rec_start_date          string,
                  s_rec_end_date            string,
                  s_closed_date_sk          int,
                  s_store_name              string,
                  s_number_employees        int,
                  s_floor_space             int,
                  s_hours                   string,
                  s_manager                 string,
                  s_market_id               int,
                  s_geography_class         string,
                  s_market_desc             string,
                  s_market_manager          string,
                  s_division_id             int,
                  s_division_name           string,
                  s_company_id              int,
                  s_company_name            string,
                  s_street_number           string,
                  s_street_name             string,
                  s_street_type             string,
                  s_suite_number            string,
                  s_city                    string,
                  s_county                  string,
                  s_state                   string,
                  s_zip                     string,
                  s_country                 string,
                  s_gmt_offset              double,
                  s_tax_precentage          double
                )
                row format delimited fields terminated by '|'
                location '%s/store'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_customer":"""
                create external table et_customer
                (
                  c_customer_sk             int,
                  c_customer_id             string,
                  c_current_cdemo_sk        int,
                  c_current_hdemo_sk        int,
                  c_current_addr_sk         int,
                  c_first_shipto_date_sk    int,
                  c_first_sales_date_sk     int,
                  c_salutation              string,
                  c_first_name              string,
                  c_last_name               string,
                  c_preferred_cust_flag     string,
                  c_birth_day               int,
                  c_birth_month             int,
                  c_birth_year              int,
                  c_birth_country           string,
                  c_login                   string,
                  c_email_address           string,
                  c_last_review_date        string
                )
                row format delimited fields terminated by '|'
                location '%s/customer'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_promotion":"""
                create external table et_promotion
                (
                  p_promo_sk                int,
                  p_promo_id                string,
                  p_start_date_sk           int,
                  p_end_date_sk             int,
                  p_item_sk                 int,
                  p_cost                    double,
                  p_response_target         int,
                  p_promo_name              string,
                  p_channel_dmail           string,
                  p_channel_email           string,
                  p_channel_catalog         string,
                  p_channel_tv              string,
                  p_channel_radio           string,
                  p_channel_press           string,
                  p_channel_event           string,
                  p_channel_demo            string,
                  p_channel_details         string,
                  p_purpose                 string,
                  p_discount_active         string
                )
                row format delimited fields terminated by '|'
                location '%s/promotion'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_household_demographics":"""
                create external table et_household_demographics
                (
                  hd_demo_sk                int,
                  hd_income_band_sk         int,
                  hd_buy_potential          string,
                  hd_dep_count              int,
                  hd_vehicle_count          int
                )
                row format delimited fields terminated by '|'
                location '%s/household_demographics'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_customer_address":"""
                create external table et_customer_address
                (
                  ca_address_sk             int,
                  ca_address_id             string,
                  ca_street_number          string,
                  ca_street_name            string,
                  ca_street_type            string,
                  ca_suite_number           string,
                  ca_city                   string,
                  ca_county                 string,
                  ca_state                  string,
                  ca_zip                    string,
                  ca_country                string,
                  ca_gmt_offset             int,
                  ca_location_type          string
                )
                row format delimited fields terminated by '|'
                location '%s/customer_address'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS']),
        "et_inventory":"""
                create external table et_inventory
                (
                  inv_date_sk               int,
                  inv_item_sk               int,
                  inv_warehouse_sk          int,
                  inv_quantity_on_hand      int
                )
                row format delimited fields terminated by '|'
                location '%s/inventory'
                tblproperties ('serialization.null.format'='')
    ;"""%(local_config()['flatfile_path_HDFS'])}

        self.partition_tbls = {
            'store_sales':"""
                drop table if exists store_sales;
                create table store_sales
                (
                  ss_sold_time_sk int,
                  ss_item_sk int,
                  ss_customer_sk int,
                  ss_cdemo_sk int,
                  ss_hdemo_sk int,
                  ss_addr_sk int,
                  ss_store_sk int,
                  ss_promo_sk int,
                  ss_ticket_number int,
                  ss_quantity int,
                  ss_wholesale_cost double,
                  ss_list_price double,
                  ss_sales_price double,
                  ss_ext_discount_amt double,
                  ss_ext_sales_price double,
                  ss_ext_wholesale_cost double,
                  ss_ext_list_price double,
                  ss_ext_tax double,
                  ss_coupon_amt double,
                  ss_net_paid double,
                  ss_net_paid_inc_tax double,
                  ss_net_profit double
                )
                partitioned by (ss_sold_date_sk int)
                stored as %s;
                set COMPRESSION_CODEC=%s;
            """%(self.tbl_format,
                    self.tbl_compress_codec)
            }
 def __init__(self, FILE_NAME):
     self.wb = Workbook()
     self.ew = ExcelWriter(workbook=self.wb)
     self.filename = os.path.join(local_config()['install_path'],'queries/logs/%s/'%(FILE_NAME.split('.')[0]), FILE_NAME)
     self.ws_num = 0
     self.ws = []
Exemplo n.º 25
0
 def create_db(self):
     #create database
     cmd = "impala-shell -i tracing024:21000 -q 'create database if not exists %s;'"%(local_config()['db_name'])
     os.system(cmd)
Exemplo n.º 26
0
 def create_external_table(self):
     for tbl in self.tbl_dic.keys():
         cmd = "impala-shell -i %s:21000  -d %s <<EOF %s  \nEOF"%(local_config()['impalad_nodes'][0],self.db_name,self.tbl_dic[tbl])
         #print(cmd)
         os.system(cmd)
from config import local_config
import os
dbname = local_config()['DATABASE_NAME']
db_type = local_config()['db_type']
cache_dic = {'q43.sql':[2450816,2451179 ],
'q27.sql':[2450815,2451179 ],
'q65.sql':[2451911,2452275 ],
'q52.sql':[2451149,2451179 ],
'q19.sql':[2451484,2451513 ],
'q98.sql':[2451911,2451941 ],
'q63.sql':[2451911,2452275 ],
'q89.sql':[2451545,2451910 ],
'q79.sql':[2451180,2451269 ],
'q68.sql':[2451180,2451269 ],
'q42.sql':[2451149,2451179 ],
'q73.sql':[2451180,2451269 ],
'q53.sql':[2451911,2452275 ],
'q55.sql':[2452245,2452275 ],
'q7.sql' :[2450815,2451179 ],
'q3.sql' :[2451149,2451179],
'q34.sql':[2450816,2451910 ]}

def to_cache(query_id):
    range_ = cache_dic[query_id]
    for range_i in range(range_[0],range_[1]):
        cmd = """
            impala-shell -i tracing024:21000 -d %s -q "alter table store_sales_%s partition(ss_sold_date_sk = %d) set cached     in 'testpool'"
            """%(dbname,db_type,int(range_i))
        os.system(cmd)

def un_cache():
Exemplo n.º 28
0
    class ParslFilter(logging.Filter):
        """Filter out Parsl debug logs"""

        def filter(self, record):
            return not (record.levelno == logging.DEBUG and '/parsl/' in record.pathname)


    for h in handlers:
        h.addFilter(ParslFilter())

    logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        level=logging.INFO, handlers=handlers)

    # Write the configuration
    # ML nodes: N for updating models, 1 for MolDQN, 1 for inference runs
    config = local_config(os.path.join(out_dir, 'run-info'), args.num_workers)

    # Save Parsl configuration
    with open(os.path.join(out_dir, 'parsl_config.txt'), 'w') as fp:
        print(str(config), file=fp)

    # Connect to the redis server
    client_queues, server_queues = make_queue_pairs(args.redishost, args.redisport,
                                                    serialization_method="pickle",
                                                    topics=['simulate', 'infer', 'train'],
                                                    keep_inputs=False)

    # Apply wrappers to functions to affix static settings
    #  Update wrapper changes the __name__ field, which is used by the Method Server
    def _fix_arguments(func, **kwargs):
        my_func = partial(func, **kwargs)
import os
from class_xml.xml_writer import xml_writers,xml_node
from xml.dom.minidom import *

from log_analysis.getStructInfo import StructInfo
from log_analysis.getDetailInfo import DetailInfo
from log_analysis.getInstanceInfo import InstanceInfo
from config import local_config
from statistics_info.data_dist import server_idx_map
server_idx_map = local_config()['server_idx_map']
import re

class class_log:

    def __init__(self, log_file,xml_filename,logging):
        self.logging = logging
        self.xml_filename = xml_filename
        with open(log_file,'r') as fp:
            self.logfile = fp.read()
            self.part_impalad = (0, self.logfile.find('+----------+'))
            self.part_queryResult = (self.part_impalad[1], self.logfile.find('Query Runtime Profile', self.part_impalad[1] + 1))
            self.part_queryRuntime = (self.part_queryResult[1], self.logfile.find('----------------', self.part_queryResult[1] + 1))
            self.part_planFragment = (self.part_queryRuntime[1], self.logfile.find('----------------', self.part_queryRuntime[1] + 1))
            self.part_eachnodecosts = (self.part_planFragment[1], self.logfile.find('    Coordinator Fragment', self.part_planFragment[1] + 1))
            self.part_plannode = (self.part_eachnodecosts[1], self.logfile.find('\n    Fragment F',self.part_eachnodecosts[1]+1))
            self.part_instances = (self.part_plannode[1],len(self.logfile))
        #1.plan.xml
        self.obj_xml = xml_writers(xml_file = os.path.join(local_config()['install_dir'],'output_dir',(self.xml_filename)),query_name = xml_filename[0:-4])
        self.dom = self.obj_xml.create_dom()
        self.obj_node = xml_node(self.dom)