Пример #1
0
  def levels(self, col=None):
    """
    Get the factor levels for this frame and the specified column index.

    :param col: A column index in this H2OFrame.
    :return: a list of strings that are the factor levels for the column.
    """
    if self.ncol==1 or col is None:
      lol=h2o.as_list(H2OFrame(expr=ExprNode("levels", self))._frame(), False)[1:]
      levels=[level for l in lol for level in l] if self.ncol==1 else lol
    elif col is not None:
      lol=h2o.as_list(H2OFrame(expr=ExprNode("levels", ExprNode("[", self, None,col)))._frame(),False)[1:]
      levels=[level for l in lol for level in l]
    else:                             levels=None
    return None if levels is None or levels==[] else levels
Пример #2
0
def list_timezones():
  """
  Get a list of all the timezones

  :return: the time zones (as an H2OFrame)
  """
  return H2OFrame(expr=ExprNode("listTimeZones"))._frame()
Пример #3
0
def get_timezone():
  """
  Get the Time Zone on the H2O Cloud

  :return: the time zone (string)
  """
  return H2OFrame(expr=ExprNode("getTimeZone"))._scalar()
Пример #4
0
 def tolower(self):
   """
   Translate characters from upper to lower case for a particular column
   Of note, mutates the frame.
   :return: H2OFrame
   """
   return H2OFrame(expr=ExprNode("tolower", self))
Пример #5
0
  def strsplit(self, pattern):
    """
    Split the strings in the target column on the given pattern

    :return: H2OFrame
    """
    return H2OFrame(expr=ExprNode("strsplit", self, pattern))
Пример #6
0
 def ddply(self,cols,fun):
   """
   :param cols: Column names used to control grouping
   :param fun: Function to execute on each group.  Right now limited to textual Rapids expression
   :return: New frame with 1 row per-group, of results from 'fun'
   """
   return H2OFrame(expr=ExprNode("ddply", self, cols, fun))._frame()
Пример #7
0
 def gsub(self, pattern, replacement, ignore_case=False):
   """
   sub and gsub perform replacement of the first and all matches respectively.
   Of note, mutates the frame.
   :return: H2OFrame
   """
   return H2OFrame(expr=ExprNode("gsub", pattern, replacement, self, ignore_case))
Пример #8
0
def ls():
  """
  List Keys on an H2O Cluster

  :return: Returns a list of keys in the current H2O instance
  """
  return H2OFrame(expr=ExprNode("ls"))._frame().as_data_frame()
Пример #9
0
 def var(self,y=None,na_rm=False,use="everything"):
   """
   :param na_rm: True or False to remove NAs from computation.
   :param use: One of "everything", "complete.obs", or "all.obs".
   :return: The covariance matrix of the columns in this H2OFrame.
   """
   return H2OFrame(expr=ExprNode("var", self,y,na_rm,use))._get()
Пример #10
0
  def unique(self):
    """
    Extract the unique values in the column.

    :return: A new H2OFrame of just the unique values in the column.
    """
    return H2OFrame(expr=ExprNode("unique", self))._frame()
Пример #11
0
  def mktime(year=1970,month=0,day=0,hour=0,minute=0,second=0,msec=0):
    """
    All units are zero-based (including months and days).  Missing year is 1970.

    :return: Returns msec since the Epoch.
    """
    return H2OFrame(expr=ExprNode("mktime", year,month,day,hour,minute,second,msec))._frame()
Пример #12
0
 def filterNACols(self, frac=0.2):
   """
   Filter columns with prportion of NAs >= frac.
   :param frac: Fraction of NAs in the column.
   :return: A  list of column indices.
   """
   return H2OFrame(expr=ExprNode("filterNACols", self, frac))._frame()
Пример #13
0
  def match(self, table, nomatch=0):
    """
    Makes a vector of the positions of (first) matches of its first argument in its second.

    :return: bit H2OVec
    """
    return H2OFrame(expr=ExprNode("match", self, table, nomatch, None))
Пример #14
0
  def trim(self):
    """
    Trim the edge-spaces in a column of strings (only operates on frame with one column)

    :return: H2OFrame
    """
    return H2OFrame(expr=ExprNode("trim", self))
Пример #15
0
def set_timezone(tz):
  """
  Set the Time Zone on the H2O Cloud

  :param tz: The desired timezone.
  :return: None
  """
  rapids(ExprNode("setTimeZone", tz)._eager())
Пример #16
0
  def mult(self, matrix):
    """
    Perform matrix multiplication.

    :param matrix: The matrix to multiply to the left of self.
    :return: The multiplied matrices.
    """
    return H2OFrame(expr=ExprNode("x", self, matrix))
Пример #17
0
  def as_date(self,format):
    """
    Return the column with all elements converted to millis since the epoch.

    :param format: The date time format string
    :return: H2OFrame
    """
    return H2OFrame(expr=ExprNode("as.Date",self,format))
Пример #18
0
  def rep_len(self, length_out):
    """
    Replicates the values in `data` in the H2O backend

    :param length_out: the number of columns of the resulting H2OFrame
    :return: an H2OFrame
    """
    return H2OFrame(expr=ExprNode("rep_len", self, length_out))
Пример #19
0
  def setLevel(self, level):
    """
    A method to set all column values to one of the levels.

    :param level: The level at which the column will be set (a string)
    :return: An H2OFrame with all entries set to the desired level
    """
    return H2OFrame(expr=ExprNode("setLevel", self, level))._frame()
Пример #20
0
  def scale(self, center=True, scale=True):
    """
    Centers and/or scales the columns of the H2OFrame

    :return: H2OFrame
    :param center: either a ‘logical’ value or numeric list of length equal to the number of columns of the H2OFrame
    :param scale: either a ‘logical’ value or numeric list of length equal to the number of columns of H2OFrame.
    """
    return H2OFrame(expr=ExprNode("scale", self, center, scale))
Пример #21
0
 def rbind(self, data):
   """
   Combine H2O Datasets by Rows.
   Takes a sequence of H2O data sets and combines them by rows.
   :param data: an H2OFrame
   :return: self, with data appended (row-wise)
   """
   if not isinstance(data, H2OFrame): raise ValueError("`data` must be an H2OFrame, but got {0}".format(type(data)))
   return H2OFrame(expr=ExprNode("rbind", self, data))
Пример #22
0
  def drop(self, i):
    """
    Returns a Frame with the column at index i dropped.

    :param i: Column to drop
    :return: Returns an H2OFrame
    """
    if isinstance(i, basestring): i = self._find_idx(i)
    return H2OFrame(expr=ExprNode("[", self, None,-(i+1)))._frame()
Пример #23
0
  def setNames(self,names):
    """
    Change the column names to `names`.

    :param names: A list of strings equal to the number of columns in the H2OFrame.
    :return: None. Rename the column names in this H2OFrame.
    """
    h2o.rapids(ExprNode("colnames=", self, range(self.ncol), names)._eager())
    self._update()
    return self
Пример #24
0
  def merge(self, other, allLeft=False, allRite=False):
    """
    Merge two datasets based on common column names

    :param other: Other dataset to merge.  Must have at least one column in common with self, and all columns in common are used as the merge key.  If you want to use only a subset of the columns in common, rename the other columns so the columns are unique in the merged result.
    :param allLeft: If true, include all rows from the left/self frame
    :param allRite: If true, include all rows from the right/other frame
    :return: Original self frame enhanced with merged columns and rows
    """
    return H2OFrame(expr=ExprNode("merge", self, other, allLeft, allRite))._frame()
Пример #25
0
  def quantile(self, prob=None, combine_method="interpolate"):
    """
    Compute quantiles over a given H2OFrame.

    :param prob: A list of probabilties, default is [0.01,0.1,0.25,0.333,0.5,0.667,0.75,0.9,0.99]. You may provide any sequence of any length.
    :param combine_method: For even samples, how to combine quantiles. Should be one of ["interpolate", "average", "low", "hi"]
    :return: an H2OFrame containing the quantiles and probabilities.
    """
    if len(self) == 0: return self
    if not prob: prob=[0.01,0.1,0.25,0.333,0.5,0.667,0.75,0.9,0.99]
    return H2OFrame(expr=ExprNode("quantile",self,prob,combine_method))._frame()
Пример #26
0
  def pop(self,i):
    """
    Pop a colunn out of an H2OFrame.

    :param i: The index or name of the column to pop.
    :return: The column dropped from the frame.
    """
    if isinstance(i, basestring): i=self._find_idx(i)
    col = H2OFrame(expr=ExprNode("pop",self,i))._frame()
    self._update()
    return col
Пример #27
0
 def remove_vecs(self, cols):
   """
   :param cols: Drop these columns.
   :return: A frame with the columns dropped.
   """
   self._eager()
   is_char = all([isinstance(i,basestring) for i in cols])
   if is_char:
     cols = [self._find_idx(col) for col in cols]
   cols = sorted(cols)
   return H2OFrame(expr=ExprNode("removeVecs",self,cols))._frame()
Пример #28
0
def ifelse(test,yes,no):
  """
  Semantically equivalent to R's ifelse.
  Based on the booleans in the test vector, the output has the values of the yes and no
  vectors interleaved (or merged together).

  :param test: A "test" H2OFrame
  :param yes:  A "yes" H2OFrame
  :param no:   A "no"  H2OFrame
  :return: An H2OFrame
  """
  return H2OFrame(expr=ExprNode("ifelse",test,yes,no))._frame()
Пример #29
0
  def cut(self, breaks, labels=None, include_lowest=False, right=True, dig_lab=3):
    """
    Cut a numeric vector into factor "buckets". Similar to R's cut method.

    :param breaks: The cut points in the numeric vector (must span the range of the col.)
    :param labels: Factor labels, defaults to set notation of intervals defined by breaks.s
    :param include_lowest: By default,  cuts are defined as (lo,hi]. If True, get [lo,hi].
    :param right: Include the high value: (lo,hi]. If False, get (lo,hi).
    :param dig_lab: Number of digits following the decimal point to consider.
    :return: A factor column.
    """
    return H2OFrame(expr=ExprNode("cut",self,breaks,labels,include_lowest,right,dig_lab))
Пример #30
0
  def setName(self,col=None,name=None):
    """
    Set the name of the column at the specified index.

    :param col: Index of the column whose name is to be set.
    :param name: The new name of the column to set
    :return: the input frame
    """
    if not isinstance(col, int) and self.ncol > 1: raise ValueError("`col` must be an index. Got: " + str(col))
    if self.ncol == 1: col = 0
    h2o.rapids(ExprNode("colnames=", self, col, name)._eager())
    self._update()
    return self