def doMatrixOp(kwargs): opType = kwargs.get('opType') mattype = kwargs.get('mattype') tableStub = kwargs.get('tableStub') savestub = kwargs.get('savestub') nodes = kwargs.get('nodes') outdir = kwargs.get('outdir') savestub = '' if (savestub is None) else savestub try: tableStub = int(tableStub) except ValueError: pass Mname = 'M{}'.format(tableStub) Nname = 'N{}'.format(tableStub) wname = 'w{}'.format(tableStub) print 'Evaluating: {}'.format(opType) colnames = [ 'nodes', 'rows', 'cols', 'time1', 'time2', 'time3', 'time4', 'time5' ] runTimes = pd.DataFrame(np.zeros((1, len(colnames)))) runTimes.columns = colnames cxn = SQLCxn(username='******', db='ubuntu', timeout=2000) shape = cxn.get_shape_dense('M{}'.format(tableStub)) cleanup = [] if (opType == 'SVD'): call = "svd('{}','svd','row_num',10, 10,'svd_summary')".format( Mname, shape[1]) cleanup.append('svd_s') cleanup.append('svd_u') cleanup.append('svd_v') cleanup.append('svd_summary') else: raise NotImplementedError('Invalid Operation') for obj in cleanup: cxn.execute('DROP TABLE IF EXISTS {}'.format(obj)) sql_call = 'SELECT madlib.{}'.format(call) rows = shape[0] cols = shape[1] path = '../output/{}/madlib_{}_{}{}.txt'.format(outdir, mattype, opType, int(nodes)) runTimes.ix[:, ['nodes', 'rows', 'cols']] = (nodes, rows, cols) madlib_timeout = ('../temp/madlib_punked_out.json', opType) res = cxn.time(sql_call, cleanup, madlib_timeout) if (res is None): print 'Timed Out' return runTimes.ix[:, 3:] = res writeHeader = False if (os.path.exists(path)) else True runTimes.to_csv(path, index=False, header=writeHeader, mode='a')
def main(kwargs): op_type = kwargs['opType'] nodes = kwargs['nodes'] stub = kwargs['stub'] colnames = [ 'nodes', 'rows', 'cols', 'time1', 'time2', 'time3', 'time4', 'time5' ] runTimes = pd.DataFrame(np.zeros((1, len(colnames)))) runTimes.columns = colnames cxn = SQLCxn(username='******', db='ubuntu') shape = cxn.get_shape_dense('adclick_clean{}_dense'.format(stub)) if not cxn.table_exists('adclick_clean_vectors_split'): stmt = """ CREATE TABLE adclick_clean_vectors_split AS ( SELECT row_num, val[1]::INTEGER y, val[2:{}]::NUMERIC[] indep_vars FROM adclick_clean{}_dense ) DISTRIBUTED BY (row_num) """.format(shape[1], stub) cxn.execute(stmt) # need to do a bit of preprocessing if op_type == 'logit': cxn.execute('DROP TABLE IF EXISTS adclick_logit_summary') cxn.execute('DROP TABLE IF EXISTS adclick_logit') call = """ SELECT madlib.logregr_train('adclick_clean_vectors_split', 'adclick_logit', 'y', 'indep_vars', NULL, 3, 'igd', .000001) """ cleanup = ['adclick_logit_summary', 'adclick_logit'] elif op_type == 'reg': cxn.execute('DROP TABLE IF EXISTS adclick_reg_summary') cxn.execute('DROP TABLE IF EXISTS adclick_reg') call = """ SELECT madlib.linregr_train('adclick_clean_vectors_split', 'adclick_reg', 'y', 'indep_vars') """ cleanup = ['adclick_reg_summary', 'adclick_reg'] elif op_type == 'pca': cxn.execute('DROP TABLE IF EXISTS result_table') cxn.execute('DROP TABLE IF EXISTS result_table_mean') cxn.execute('DROP TABLE IF EXISTS residual_table') cxn.execute('DROP TABLE IF EXISTS result_summary_table') cxn.execute('DROP TABLE IF EXISTS adlick_prj') stmt = """ CREATE TABLE adclick_clean_depvars AS ( SELECT row_num, val[2:{}]::NUMERIC[] val FROM adclick_clean{}_dense ) DISTRIBUTED BY (row_num) """.format(shape[1], stub) if not cxn.table_exists('adclick_clean_depvars'): cxn.execute(stmt) call = """ SELECT madlib.pca_train('adclick_clean_depvars', 'result_table', 'row_num', 5); SELECT madlib.pca_project('adclick_clean_depvars', 'result_table', 'adclick_prj', 'row_num', 'residual_table', 'result_summary_table') """ cleanup = [ 'result_table', 'result_table_mean', 'residual_table', 'result_summary_table', 'adclick_prj' ] #shape = cxn.get_shape_dense('adclick_clean{}_dense'.format(stub)) runTimes.ix[:, ['rows', 'cols']] = shape path = '../output/madlib_{}{}_dense.txt'.format(op_type, int(nodes)) runTimes.ix[:, 'nodes'] = nodes res = cxn.time(call, cleanup) runTimes.ix[:, 3:] = res runTimes.to_csv(path, index=False)
def doMatrixOp(kwargs): opType = kwargs.get('opType') mattype = kwargs.get('mattype') tableStub = kwargs.get('tableStub') savestub = kwargs.get('savestub') nodes = kwargs.get('nodes') outdir = kwargs.get('outdir') savestub = '' if (savestub is None) else savestub try: tableStub = int(tableStub) except ValueError: pass Mname = 'M{}'.format(tableStub) Nname = 'N{}'.format(tableStub) wname = 'w{}'.format(tableStub) print 'Evaluating: {}'.format(opType) colnames = [ 'nodes', 'rows', 'cols', 'time1', 'time2', 'time3', 'time4', 'time5' ] runTimes = pd.DataFrame(np.zeros((1, len(colnames)))) runTimes.columns = colnames cxn = SQLCxn(username='******', db='ubuntu', timeout=10000) shape = cxn.get_shape_dense('M{}'.format(tableStub)) cleanup = [] if (opType == 'TRANS'): call = "matrix_trans('{}',NULL,'Mt',NULL)".format(Mname) cleanup.append('Mt') elif (opType == 'NORM'): call = "matrix_norm('{}',NULL,'fro')".format(Mname) elif (opType == 'GMM'): Nname = Mname.replace('wide', 'tall') call = "matrix_mult('{}',NULL,'{}',NULL,'MN',NULL)".format( Mname, Nname) cleanup.append('MN') elif (opType == 'MVM'): array_call = 'SELECT array_agg(random()) FROM generate_series(1,{})'.format( shape[1]) call = "matrix_vec_mult('{}',NULL,({}))".format(Mname, array_call) elif (opType == 'TSM'): call = "matrix_mult('{0}','trans=True','{0}',NULL,'MtM',NULL)".format( Mname) cleanup.append('MtM') elif (opType == 'ADD'): call = "matrix_add('{}',NULL,'{}',NULL,'M_N',NULL)".format( Mname, Nname) cleanup.append('M_N') else: raise NotImplementedError('Invalid Operation') for obj in cleanup: cxn.execute('DROP TABLE IF EXISTS {}'.format(obj)) sql_call = 'SELECT madlib.{}'.format(call) rows = shape[0] cols = shape[1] path = '../output/{}/madlib_{}_{}{}.txt'.format(outdir, mattype, opType, int(nodes)) runTimes.ix[:, ['nodes', 'rows', 'cols']] = (nodes, rows, cols) madlib_timeout = ('../temp/madlib_punked_out.json', opType) res = cxn.time(sql_call, cleanup, madlib_timeout) if (res is None): print 'Timed Out' return runTimes.ix[:, 3:] = res writeHeader = False if (os.path.exists(path)) else True runTimes.to_csv(path, index=False, header=writeHeader, mode='a')