示例#1
0
def get_frame(frame_id):
    if frame_id is None:
        raise ValueError("frame_id must not be None")
    res = H2OConnection.get_json("Frames/" + urllib.quote(frame_id))
    res = res["frames"][0]
    colnames = [v["label"] for v in res["columns"]]
    veckeys = res["vec_ids"]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys), res["rows"])
    return H2OFrame(vecs=vecs)
示例#2
0
文件: h2o.py 项目: yuecong/h2o-3
def get_frame(frame_id):
  if frame_id is None:
    raise ValueError("frame_id must not be None")
  res = H2OConnection.get_json("Frames/"+urllib.quote(frame_id))
  res = res["frames"][0]
  colnames = [v["label"] for v in res["columns"]]
  veckeys  = res["vec_ids"]
  vecs=H2OVec.new_vecs(zip(colnames, veckeys), res["rows"])
  return H2OFrame(vecs=vecs)
示例#3
0
def ifelse(test, yes, no):
    """
  Semantically equivalent to R's ifelse.
  Based on the booleans in the test vector, the output has the values of the yes and no
  vectors interleaved (or merged together).

  :param test: A "test" H2OFrame
  :param yes:  A "yes" H2OFrame
  :param no:   A "no"  H2OFrame
  :return: An H2OFrame
  """
    test_a = None
    yes_a = None
    no_a = None

    test_tmp = None
    yes_tmp = None
    no_tmp = None

    if isinstance(test, bool): test_a = "%TRUE" if test else "%FALSE"
    else:
        if isinstance(test, H2OVec): test_tmp = test._expr.eager()
        else: test_tmp = test.key()
        test_a = "'" + test_tmp + "'"
    if isinstance(yes, (int, float)): yes_a = "#{}".format(str(yes))
    elif yes is None: yes_a = "#NaN"
    else:
        if isinstance(yes, H2OVec): yes_tmp = yes._expr.eager()
        else: yes_tmp = yes.key()
        yes_a = "'" + yes_tmp + "'"
    if isinstance(no, (int, float)): no_a = "#{}".format(str(no))
    elif no is None: no_a = "#NaN"
    else:
        if isinstance(no, H2OVec): no_tmp = no._expr.eager()
        else: no_tmp = no.key()
        no_a = "'" + no_tmp + "'"

    tmp_key = H2OFrame.py_tmp_key()
    expr = "(= !{} (ifelse {} {} {}))".format(tmp_key, test_a, yes_a, no_a)
    rapids(expr)
    j = frame(tmp_key)  # Fetch the frame as JSON
    fr = j['frames'][0]  # Just the first (only) frame
    rows = fr['rows']  # Row count
    veckeys = fr['vec_ids']  # List of h2o vec keys
    cols = fr['columns']  # List of columns
    colnames = [col['label'] for col in cols]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys),
                           rows)  # Peel the Vecs out of the returned Frame
    removeFrameShallow(tmp_key)
    if yes_tmp is not None: removeFrameShallow(str(yes_tmp))
    if no_tmp is not None: removeFrameShallow(str(no_tmp))
    if test_tmp is not None: removeFrameShallow(str(test_tmp))
    return H2OFrame(vecs=vecs)
示例#4
0
文件: h2o.py 项目: jethrotan/h2o-3
def ifelse(test,yes,no):
  """
  Semantically equivalent to R's ifelse.
  Based on the booleans in the test vector, the output has the values of the yes and no
  vectors interleaved (or merged together).

  :param test: A "test" H2OFrame
  :param yes:  A "yes" H2OFrame
  :param no:   A "no"  H2OFrame
  :return: An H2OFrame
  """
  test_a=None
  yes_a =None
  no_a  =None

  test_tmp = None
  yes_tmp  = None
  no_tmp   = None

  if isinstance(test, bool): test_a = "%TRUE" if test else "%FALSE"
  else:
    if isinstance(test,H2OVec): test_tmp = test._expr.eager()
    else:                       test_tmp = test.key()
    test_a = "'"+test_tmp+"'"
  if isinstance(yes, (int,float)): yes_a = "#{}".format(str(yes))
  elif yes is None:                yes_a = "#NaN"
  else:
    if isinstance(yes,H2OVec): yes_tmp = yes._expr.eager()
    else:                      yes_tmp = yes.key()
    yes_a = "'"+yes_tmp+"'"
  if isinstance(no, (int,float)): no_a = "#{}".format(str(no))
  elif no is None:                no_a = "#NaN"
  else:
    if isinstance(no,H2OVec): no_tmp = no._expr.eager()
    else:                     no_tmp = no.key()
    no_a = "'"+no_tmp+"'"

  tmp_key = H2OFrame.py_tmp_key()
  expr = "(= !{} (ifelse {} {} {}))".format(tmp_key,test_a,yes_a,no_a)
  rapids(expr)
  j = frame(tmp_key) # Fetch the frame as JSON
  fr = j['frames'][0]    # Just the first (only) frame
  rows = fr['rows']      # Row count
  veckeys = fr['vec_ids']# List of h2o vec keys
  cols = fr['columns']   # List of columns
  colnames = [col['label'] for col in cols]
  vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows) # Peel the Vecs out of the returned Frame
  removeFrameShallow(tmp_key)
  if yes_tmp is not  None: removeFrameShallow(str(yes_tmp))
  if no_tmp is not   None: removeFrameShallow(str(no_tmp))
  if test_tmp is not None: removeFrameShallow(str(test_tmp))
  return H2OFrame(vecs=vecs)
示例#5
0
def _simple_un_math_op(op, data):
    """
  Element-wise math operations on H2OFrame and H2OVec

  :param op: the math operation
  :param data: the H2OFrame or H2OVec object to operate on.
  :return: H2OFrame or H2oVec, with lazy operation
  """
    if isinstance(data, H2OFrame):
        return H2OFrame(
            vecs=[_simple_un_math_op(op, vec) for vec in data._vecs])
    if isinstance(data, H2OVec):
        return H2OVec(data._name, Expr(op, left=data, length=len(data)))
    raise ValueError, op + " only operates on H2OFrame or H2OVec objects"
示例#6
0
文件: h2o.py 项目: yuecong/h2o-3
def parse_raw(setup, id=None, first_line_is_header=(-1,0,1)):
  """
  Used in conjunction with import_file and parse_setup in order to make alterations before parsing.
  :param setup: Result of h2o.parse_setup
  :param id: An optional id for the frame.
  :param first_line_is_header: -1,0,1 if the first line is to be used as the header
  :return: An H2OFrame object
  """
  if id is None: id = H2OFrame.py_tmp_key()
  parsed = parse(setup, id, first_line_is_header)
  veckeys = parsed['vec_ids']
  rows = parsed['rows']
  cols = parsed['column_names'] if parsed["column_names"] else ["C" + str(x) for x in range(1,len(veckeys)+1)]
  vecs = H2OVec.new_vecs(zip(cols, veckeys), rows)
  return H2OFrame(vecs=vecs)
示例#7
0
文件: h2o.py 项目: jethrotan/h2o-3
def rep_len(data, length_out):
  if isinstance(data, (str, int)):
    tmp_key = H2OFrame.py_tmp_key()
    scaler = '#{}'.format(data) if isinstance(data, int) else '\"{}\"'.format(data)
    expr = "(= !{} (rep_len {} {}))".format(tmp_key,scaler,'#{}'.format(length_out))
    rapids(expr)
    j = frame(tmp_key)
    fr = j['frames'][0]
    rows = fr['rows']
    veckeys = fr['vec_ids']
    cols = fr['columns']
    colnames = [col['label'] for col in cols]
    vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows)
    removeFrameShallow(tmp_key)
    return H2OFrame(vecs=vecs)
  return data.rep_len(length_out=length_out)
示例#8
0
def parse_raw(setup, id=None, first_line_is_header=(-1, 0, 1)):
    """
  Used in conjunction with import_file and parse_setup in order to make alterations before parsing.
  :param setup: Result of h2o.parse_setup
  :param id: An optional id for the frame.
  :param first_line_is_header: -1,0,1 if the first line is to be used as the header
  :return: An H2OFrame object
  """
    if id is None: id = H2OFrame.py_tmp_key()
    parsed = parse(setup, id, first_line_is_header)
    veckeys = parsed['vec_ids']
    rows = parsed['rows']
    cols = parsed['column_names'] if parsed["column_names"] else [
        "C" + str(x) for x in range(1,
                                    len(veckeys) + 1)
    ]
    vecs = H2OVec.new_vecs(zip(cols, veckeys), rows)
    return H2OFrame(vecs=vecs)
示例#9
0
文件: h2o.py 项目: jethrotan/h2o-3
def ls():
  """
  List Keys on an H2O Cluster
  :return: Returns a list of keys in the current H2O instance
  """
  tmp_key = H2OFrame.py_tmp_key()
  expr = "(= !{} (ls ))".format(tmp_key)
  rapids(expr)
  j = frame(tmp_key)
  fr = j['frames'][0]
  rows = fr['rows']
  veckeys = fr['vec_ids']
  cols = fr['columns']
  colnames = [col['label'] for col in cols]
  vecs=H2OVec.new_vecs(zip(colnames, veckeys), rows)
  fr = H2OFrame(vecs=vecs)
  print "First 10 Keys: "
  fr.show()
  return as_list(fr, use_pandas=False)
示例#10
0
def ls():
    """
  List Keys on an H2O Cluster
  :return: Returns a list of keys in the current H2O instance
  """
    tmp_key = H2OFrame.py_tmp_key()
    expr = "(= !{} (ls ))".format(tmp_key)
    rapids(expr)
    j = frame(tmp_key)
    fr = j['frames'][0]
    rows = fr['rows']
    veckeys = fr['vec_ids']
    cols = fr['columns']
    colnames = [col['label'] for col in cols]
    vecs = H2OVec.new_vecs(zip(colnames, veckeys), rows)
    fr = H2OFrame(vecs=vecs)
    fr.setNames(["keys"])
    print "First 10 Keys: "
    fr.show()
    return as_list(fr, use_pandas=False)
示例#11
0
def cbind(left, right):
    """
  :param left: H2OFrame or H2OVec
  :param right: H2OFrame or H2OVec
  :return: new H2OFrame with left|right cbinded
  """
    # Check left and right data types
    vecs = []
    if isinstance(left, H2OFrame) and isinstance(right, H2OFrame):
        vecs = left._vecs + right._vecs
    elif isinstance(left, H2OFrame) and isinstance(right, H2OVec):
        [vecs.append(vec) for vec in left._vecs]
        vecs.append(right)
    elif isinstance(left, H2OVec) and isinstance(right, H2OVec):
        vecs = [left, right]
    elif isinstance(left, H2OVec) and isinstance(right, H2OFrame):
        vecs.append(left)
        [vecs.append(vec) for vec in right._vecs]
    else:
        raise ValueError("left and right data must be H2OVec or H2OFrame")
    names = [vec.name() for vec in vecs]

    fr = H2OFrame.py_tmp_key()
    cbind = "(= !" + fr + " (cbind %FALSE %"
    cbind += " %".join([vec._expr.eager() for vec in vecs]) + "))"
    rapids(cbind)

    j = frame(fr)
    fr = j['frames'][0]
    rows = fr['rows']
    vec_ids = fr['vec_ids']
    cols = fr['columns']
    colnames = [col['label'] for col in cols]
    result = H2OFrame(vecs=H2OVec.new_vecs(zip(colnames, vec_ids), rows))
    result.setNames(names)
    return result
示例#12
0
文件: h2o.py 项目: yuecong/h2o-3
def cbind(left,right):
  """
  :param left: H2OFrame or H2OVec
  :param right: H2OFrame or H2OVec
  :return: new H2OFrame with left|right cbinded
  """
  # Check left and right data types
  vecs = []
  if isinstance(left,H2OFrame) and isinstance(right,H2OFrame):
    vecs = left._vecs + right._vecs
  elif isinstance(left,H2OFrame) and isinstance(right,H2OVec):
    [vecs.append(vec) for vec in left._vecs]
    vecs.append(right)
  elif isinstance(left,H2OVec) and isinstance(right,H2OVec):
    vecs = [left, right]
  elif isinstance(left,H2OVec) and isinstance(right,H2OFrame):
    vecs.append(left)
    [vecs.append(vec) for vec in right._vecs]
  else:
    raise ValueError("left and right data must be H2OVec or H2OFrame")
  names = [vec.name() for vec in vecs]

  fr = H2OFrame.py_tmp_key()
  cbind = "(= !" + fr + " (cbind %FALSE %"
  cbind += " %".join([vec._expr.eager() for vec in vecs]) + "))"
  rapids(cbind)

  j = frame(fr)
  fr = j['frames'][0]
  rows = fr['rows']
  vec_ids = fr['vec_ids']
  cols = fr['columns']
  colnames = [col['label'] for col in cols]
  result = H2OFrame(vecs=H2OVec.new_vecs(zip(colnames, vec_ids), rows))
  result.setNames(names)
  return result