Exemplo n.º 1
0
def setup_random_seed(seed=None):
    # h2o_args.unit_main() or h2o.init() or this function, may be the first to call it
    # that makes sure it's called to setup any --seed init before we look for a
    # command line arg here. (h2o.setup_random_seed() is done before h2o.init() in tests)
    # parse_our_args() will be a noop if it was already called once
    h2o_args.parse_our_args()
    if h2o_args.random_seed is not None:
        SEED = h2o_args.random_seed
    elif seed is not None:
        SEED = seed
    else:
        SEED = random.randint(0, sys.maxint)
    random.seed(SEED)
    print "\nUsing random seed:", SEED
    return SEED
Exemplo n.º 2
0
def setup_random_seed(seed=None):
    # h2o_args.unit_main() or h2o.init() or this function, may be the first to call it
    # that makes sure it's called to setup any --seed init before we look for a 
    # command line arg here. (h2o.setup_random_seed() is done before h2o.init() in tests)
    # parse_our_args() will be a noop if it was already called once
    h2o_args.parse_our_args()
    if h2o_args.random_seed is not None:
        SEED = h2o_args.random_seed
    elif seed is not None:
        SEED = seed
    else:
        SEED = random.randint(0, sys.maxint)
    random.seed(SEED)
    print "\nUsing random seed:", SEED
    return SEED
Exemplo n.º 3
0
Arquivo: h2o.py Projeto: JMR-b/h2o-dev
def init(*args, **kwargs):
    # this will be a no-op if already called once
    h2o_args.parse_our_args()

    global nodes, n0
    # ugly, but we have 3 places that are kept in sync..check them all 
    def checkIsNone(thingName, thing):
        if not (thing is None or len(thing)==0):
            print "WARNING: %s is not empty before h2o.init()..Unlikely that makes sense? %s" %\
                (thingName, thing)

    checkIsNone("nodes", nodes)
    checkIsNone("n0", n0)
    checkIsNone("h2o_nodes.nodes", h2o_nodes.nodes)
    nodes = h2o_bc.init(*args, **kwargs)
    n0 = nodes[0] # use to save typing?..i.e. h2o.n0
Exemplo n.º 4
0
def init(*args, **kwargs):
    # this will be a no-op if already called once
    h2o_args.parse_our_args()

    global nodes, n0

    # ugly, but we have 3 places that are kept in sync..check them all
    def checkIsNone(thingName, thing):
        if not (thing is None or len(thing) == 0):
            print "WARNING: %s is not empty before h2o.init()..Unlikely that makes sense? %s" %\
                (thingName, thing)

    checkIsNone("nodes", nodes)
    checkIsNone("n0", n0)
    checkIsNone("h2o_nodes.nodes", h2o_nodes.nodes)
    nodes = h2o_bc.init(*args, **kwargs)
    n0 = nodes[0]  # use to save typing?..i.e. h2o.n0
Exemplo n.º 5
0
            row.update(kwargs)
            row.update(glm)
            row.update(val)
            row.update({'wall_clock_secs': wall_clock_secs})
            row.update({'java_heap_GB': java_heap_GB})
            csvWrt.writerow(row)
        h2o.nodes[0].remove_key(k)
    finally:
        output.close()
    
def parse_file(f):
    v = h2o.nodes[0].import_files()['succeeded'][0]
    return h2o.nodes[0].parse(v['key'],timeoutSecs=3600)['destination_key']

if __name__ == '__main__':
    h2o_args.parse_our_args()
    files = None
    if is_ec2():
        files = ec2_files
        h2o.init()
    else:
        files = local_files
        h2o.init(use_hdfs=True)

    # want to ignore columns with missing values, since GLM throws away those rows, (won't analyze as many rows)
    # Distance, CRSEElapsedTime has some...I guess ignore
    # column Year 0 type: int
    # column Month 1 type: int
    # column DayofMonth 2 type: int
    # column DayOfWeek 3 type: int
    # column DepTime 4 type: int num_missing_values: 2302136
Exemplo n.º 6
0
            row.update(val)
            row.update({'wall_clock_secs': wall_clock_secs})
            row.update({'java_heap_GB': java_heap_GB})
            csvWrt.writerow(row)
        h2o.nodes[0].remove_key(k)
    finally:
        output.close()


def parse_file(f):
    v = h2o.nodes[0].import_files()['succeeded'][0]
    return h2o.nodes[0].parse(v['key'], timeoutSecs=3600)['destination_key']


if __name__ == '__main__':
    h2o_args.parse_our_args()
    files = None
    if is_ec2():
        files = ec2_files
        h2o.init()
    else:
        files = local_files
        h2o.init(use_hdfs=True)

    # want to ignore columns with missing values, since GLM throws away those rows, (won't analyze as many rows)
    # Distance, CRSEElapsedTime has some...I guess ignore
    # column Year 0 type: int
    # column Month 1 type: int
    # column DayofMonth 2 type: int
    # column DayOfWeek 3 type: int
    # column DepTime 4 type: int num_missing_values: 2302136