Exemplo n.º 1
0
        cmd_params_hdfs = {'mattype' : mattype_m,
                   'Mpath'   : Mpath_hdfs,
                   'wPath'   : wPath_hdfs,
                   'Npath'   : Npath_hdfs,
                   'nodes'   : nodes,
                   'savestub': gb,
                   'tableStub' : '{}_{}'.format(gb, mattype_m)}

        cmd_params_disk['opType'] = op
        cmd_params_hdfs['opType'] = op
        args_disk = cmd_args.format(**cmd_params_disk)
        args_hdfs = cmd_args.format(**cmd_params_hdfs)

        if 'SYSTEMML' in systems:
          utils.run_spark(program = 'SystemMLMatrixOps',
                          sbt_dir = './systemml',
                          cmd_args = args_hdfs)
        if 'MLLIB' in systems:
          utils.run_spark(program = 'SparkMatrixOps',
                          sbt_dir = './mllib',
                          cmd_args = args_hdfs)
        if 'MADLIB' in systems:
          utils.run_python(program = 'madlib_matrix_ops.py',
                           cmd_args = args_disk)
        if 'R' in systems:
          utils.run_pbdR(program = 'R_matrix_ops.R',
                         cmd_args = args_disk)

        if 'SCIDB' in systems:
          utils.run_python(program = 'scidb_matrix_ops.py',
                           cmd_args = args_disk)
Exemplo n.º 2
0
if (project_root is None):
    msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"'
    raise StandardError(msg)

externals = {
    'lib': '/lib',
    'disk_data': '/tests/SimpleMatrixOps (Disk Data)/output'
}
for name in externals:
    os.symlink(project_root + externals[name], '../external/' + name)

sys.path.append('../external/lib/python')
import make_utils as utils
import global_params as params

# start logging
start_make_logging()

# compile
makelog = '../../output/make.log'
utils.run_sbt('./spark', makelog=makelog)

utils.run_python(program='get_data.py')
os.putenv('SAVE_STUB', '_1')
utils.run_spark(program='SparkPreclean',
                sbt_dir='./spark',
                cmd_args='/scratch/day_1.gz true')

# stop logging
end_make_logging()
Exemplo n.º 3
0
        cmd_params_disk = {'mattype' : mattype_m,
                   'Mpath'   : Mpath_disk,
                   'wPath'   : wPath_disk,
                   'Npath'   : Npath_disk,
                   'nodes'   : nodes,
                   'savestub': gb,
                   'tableStub' : '{}_{}'.format(gb, mattype_m)}
        cmd_params_hdfs = {'mattype' : mattype_m,
                   'Mpath'   : Mpath_hdfs,
                   'wPath'   : wPath_hdfs,
                   'Npath'   : Npath_hdfs,
                   'nodes'   : nodes,
                   'savestub': gb,
                   'tableStub' : '{}_{}'.format(gb, mattype_m)}

        cmd_params_disk['opType'] = op
        cmd_params_hdfs['opType'] = op
        args_disk = cmd_args.format(**cmd_params_disk)
        args_hdfs = cmd_args.format(**cmd_params_hdfs)

        if 'MLLIB' in systems:
          utils.run_spark(program = 'SparkDecompositions',
                 sbt_dir = './mllib',
                 cmd_args = args_hdfs)
        if 'MADLIB' in systems:
          utils.run_python(program = 'madlib_matrix_ops.py',
                  cmd_args = args_disk)
        if 'R' in systems:
          utils.run_pbdR(program = 'R_matrix_ops.R',
                cmd_args = args_disk)
Exemplo n.º 4
0
 args_disk = cmd_arg_params.format(opType=op,
                                   inputPath=path_disk,
                                   nodes=nodes,
                                   featureNames='ignored',
                                   stub=stub)
 args_hdfs = cmd_arg_params.format(opType=op,
                                   inputPath=path_hdfs,
                                   nodes=nodes,
                                   featureNames=feature_names,
                                   stub=stub)
 os.system('rm -rf /tmp/systemml')
 if 'TF' in systems:
     utils.run_python(program='tf_algs.py', cmd_args=args_disk)
 if 'MLLIB' in systems:
     utils.run_spark(program='SparkMLAlgorithms',
                     sbt_dir='./mllib',
                     cmd_args=args_hdfs)
 if 'SYSTEMML' in systems:
     utils.run_spark(program='SystemMLMLAlgorithms',
                     sbt_dir='./systemml',
                     driver_memory='64G',
                     cmd_args=args_hdfs)
     utils.run_spark(program='SystemMLMLAlgorithms',
                     sbt_dir='./systemml',
                     driver_memory='32G',
                     cmd_args=args_hdfs + ' execSpark=true')
 if 'MADLIB' in systems:
     if typ == 'dense':
         utils.run_python(program='madlib_algs.py', cmd_args=args_disk)
     else:
         utils.run_python(program='madlib_sparse_algs.py',
Exemplo n.º 5
0
# create symlinks to external resources
project_root = os.getenv('BENCHMARK_PROJECT_ROOT')
if (project_root is None):
    msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"'
    raise StandardError(msg)

externals = {
    'lib': '/lib',
    'disk_data': '/tests/SimpleMatrixOps (Disk Data)/output'
}
for name in externals:
    os.symlink(project_root + externals[name], '../external/' + name)

sys.path.append('../external/lib/python')
import make_utils as utils
import global_params as params

# start logging
start_make_logging()

# compile
makelog = '../../output/make.log'
utils.run_sbt('./spark', makelog=makelog)

utils.run_spark(program='SparkPreclean', sbt_dir='./spark', cmd_args='_1 true')
utils.run_python(program='postprocess.py', cmd_args='--sparse False --stub _1')
utils.run_python(program='build_tables.py', cmd_args='_1')

# stop logging
end_make_logging()
Exemplo n.º 6
0
args_madlib = ('mattype=adclick '
               'xTableName=adclick_clean_indepvars_long '
               'yTableName=adclick_clean_y '
               'nodes={nodes} opType={op}')
args_hdfs = ('mattype=adclick '
             'Xpath=/scratch/pass.csv '
             'Ypath=/scratch/pass.csv '
             'passPath=/scratch/pass.csv '
             'dataPath=/scratch/adclick_clean{stub}_sparse.parquet '
             'nodes={nodes} opType={op}')

for alg in algorithms:
    argv = {'stub': stub, 'nodes': nodes, 'op': alg}

    cmd_args_hdfs = args_hdfs.format(**argv)

    if 'SYSTEMML' in systems:
        utils.run_spark(program='SystemMLMLAlgorithms',
                        sbt_dir='./systemml',
                        driver_memory='80G',
                        cmd_args=cmd_args_hdfs)
    if 'MLLIB' in systems:
        utils.run_spark(program='SparkMLAlgorithms',
                        sbt_dir='./mllib',
                        driver_memory='20G',
                        cmd_args=cmd_args_hdfs)
    if 'MADLIB' in systems:
        print 'MADLib Tests Not Implemented for Sparse Criteo'
    #    utils.run_python(program='madlib_bigmat_algs.py',
    #                     cmd_args=cmd_args_madlib)
Exemplo n.º 7
0
    msg = 'Pease set environment variable "BENCHMARK_PROJECT_ROOT"'
    raise StandardError(msg)

externals = {'lib': '/lib'}
for name in externals:
    os.symlink(project_root + externals[name], '../external/' + name)

sys.path.append('../external/lib/python')
import make_utils as utils
import global_params as params
import gen_data as data

# start logging
start_make_logging()

# compile
makelog = '../../output/make.log'
utils.run_sbt('./systemml', makelog=makelog)
utils.run_sbt('./mllib', makelog=makelog)

#utils.run_pbdR(program='R_pipelines.R')
utils.run_spark(program='SparkPipelines', sbt_dir='./mllib', cmd_args='')
#utils.run_spark(program='SystemMLPipelines',
#               sbt_dir='./systemml',
#               cmd_args='')

remove_dir('scratch_space')

# stop logging
end_make_logging()
Exemplo n.º 8
0
mattype = 'tall'
if test_type == 'matrix-ops':
    for op in op_types:
        args = (op, mattype, nrows)
        cmd_args = cmd_args_template.format(*args)
        cmd_args += ' fixedAxis=100 step=10'

        if 'NUMPY' in systems:
            utils.run_python(program='np_matrix_ops.py', cmd_args=cmd_args)
        if 'TF' in systems:
            utils.run_python(program='tf_matrix_ops.py', cmd_args=cmd_args)
        if 'R' in systems:
            utils.run_R(program='R_matrix_ops.R', cmd_args=cmd_args)
        if 'SYSTEMML' in systems:
            utils.run_spark(program='SystemMLMatrixOps',
                            sbt_dir='./systemml',
                            cmd_args=cmd_args)
        if 'MLLIB' in systems:
            utils.run_spark(program='MLLibMatrixOps',
                            sbt_dir='./mllib',
                            cmd_args=cmd_args)
        if 'MADLIB' in systems:
            utils.run_python(program='madlib_matrix_ops.py', cmd_args=cmd_args)
        if 'SCIDB' in systems:
            utils.run_python(program='scidb_matrix_ops.py', cmd_args=cmd_args)

if test_type == 'cpu':
    #    ops = ['TSM','ADD']
    ops = ['TSM']
    nproc = [1, 2, 4, 8, 16]
    for num_proc in nproc:
Exemplo n.º 9
0
    for num_proc in nproc:
        # This will force the process to execute only on a subset of processors
        utils.set_nproc(num_proc)
        args = (op, mattype, nrows)
        cmd_args = cmd_args_template.format(*args)
        cmd_args += ' fixedAxis=100 step=10 nproc={}'.format(num_proc)

        if 'NUMPY' in systems:
            utils.run_python(program='np_algs.py', cmd_args=cmd_args)
        if ('TF' in systems) and (op != 'logit'):
            utils.run_python(program='tf_algorithms.py', cmd_args=cmd_args)
        if 'R' in systems:
            utils.run_R(program='ml_algs.R', cmd_args=cmd_args)
        if 'SYSTEMML' in systems:
            utils.run_spark(program='SystemMLMLAlgorithms',
                            sbt_dir='./systemml',
                            driver_cores=str(num_proc),
                            cmd_args=cmd_args)
        if 'MLLIB' in systems:
            utils.run_spark(program='SparkMLAlgorithms',
                            sbt_dir='./mllib',
                            driver_cores=str(num_proc),
                            cmd_args=cmd_args)
        utils.set_nproc(999)
        if 'MADLIB' in systems:
            utils.run_python(program='madlib_algs.py', cmd_args=cmd_args)
        if 'SCIDB' in systems:
            utils.run_python(program='scidb_algs.py', cmd_args=cmd_args)

remove_dir('scratch_space')

# stop logging