def levels(self, col=None): """ Get the factor levels for this frame and the specified column index. :param col: A column index in this H2OFrame. :return: a list of strings that are the factor levels for the column. """ if self.ncol==1 or col is None: lol=h2o.as_list(H2OFrame(expr=ExprNode("levels", self))._frame(), False)[1:] levels=[level for l in lol for level in l] if self.ncol==1 else lol elif col is not None: lol=h2o.as_list(H2OFrame(expr=ExprNode("levels", ExprNode("[", self, None,col)))._frame(),False)[1:] levels=[level for l in lol for level in l] else: levels=None return None if levels is None or levels==[] else levels
def list_timezones(): """ Get a list of all the timezones :return: the time zones (as an H2OFrame) """ return H2OFrame(expr=ExprNode("listTimeZones"))._frame()
def get_timezone(): """ Get the Time Zone on the H2O Cloud :return: the time zone (string) """ return H2OFrame(expr=ExprNode("getTimeZone"))._scalar()
def tolower(self): """ Translate characters from upper to lower case for a particular column Of note, mutates the frame. :return: H2OFrame """ return H2OFrame(expr=ExprNode("tolower", self))
def strsplit(self, pattern): """ Split the strings in the target column on the given pattern :return: H2OFrame """ return H2OFrame(expr=ExprNode("strsplit", self, pattern))
def ddply(self,cols,fun): """ :param cols: Column names used to control grouping :param fun: Function to execute on each group. Right now limited to textual Rapids expression :return: New frame with 1 row per-group, of results from 'fun' """ return H2OFrame(expr=ExprNode("ddply", self, cols, fun))._frame()
def gsub(self, pattern, replacement, ignore_case=False): """ sub and gsub perform replacement of the first and all matches respectively. Of note, mutates the frame. :return: H2OFrame """ return H2OFrame(expr=ExprNode("gsub", pattern, replacement, self, ignore_case))
def ls(): """ List Keys on an H2O Cluster :return: Returns a list of keys in the current H2O instance """ return H2OFrame(expr=ExprNode("ls"))._frame().as_data_frame()
def var(self,y=None,na_rm=False,use="everything"): """ :param na_rm: True or False to remove NAs from computation. :param use: One of "everything", "complete.obs", or "all.obs". :return: The covariance matrix of the columns in this H2OFrame. """ return H2OFrame(expr=ExprNode("var", self,y,na_rm,use))._get()
def unique(self): """ Extract the unique values in the column. :return: A new H2OFrame of just the unique values in the column. """ return H2OFrame(expr=ExprNode("unique", self))._frame()
def mktime(year=1970,month=0,day=0,hour=0,minute=0,second=0,msec=0): """ All units are zero-based (including months and days). Missing year is 1970. :return: Returns msec since the Epoch. """ return H2OFrame(expr=ExprNode("mktime", year,month,day,hour,minute,second,msec))._frame()
def filterNACols(self, frac=0.2): """ Filter columns with prportion of NAs >= frac. :param frac: Fraction of NAs in the column. :return: A list of column indices. """ return H2OFrame(expr=ExprNode("filterNACols", self, frac))._frame()
def match(self, table, nomatch=0): """ Makes a vector of the positions of (first) matches of its first argument in its second. :return: bit H2OVec """ return H2OFrame(expr=ExprNode("match", self, table, nomatch, None))
def trim(self): """ Trim the edge-spaces in a column of strings (only operates on frame with one column) :return: H2OFrame """ return H2OFrame(expr=ExprNode("trim", self))
def set_timezone(tz): """ Set the Time Zone on the H2O Cloud :param tz: The desired timezone. :return: None """ rapids(ExprNode("setTimeZone", tz)._eager())
def mult(self, matrix): """ Perform matrix multiplication. :param matrix: The matrix to multiply to the left of self. :return: The multiplied matrices. """ return H2OFrame(expr=ExprNode("x", self, matrix))
def as_date(self,format): """ Return the column with all elements converted to millis since the epoch. :param format: The date time format string :return: H2OFrame """ return H2OFrame(expr=ExprNode("as.Date",self,format))
def rep_len(self, length_out): """ Replicates the values in `data` in the H2O backend :param length_out: the number of columns of the resulting H2OFrame :return: an H2OFrame """ return H2OFrame(expr=ExprNode("rep_len", self, length_out))
def setLevel(self, level): """ A method to set all column values to one of the levels. :param level: The level at which the column will be set (a string) :return: An H2OFrame with all entries set to the desired level """ return H2OFrame(expr=ExprNode("setLevel", self, level))._frame()
def scale(self, center=True, scale=True): """ Centers and/or scales the columns of the H2OFrame :return: H2OFrame :param center: either a ‘logical’ value or numeric list of length equal to the number of columns of the H2OFrame :param scale: either a ‘logical’ value or numeric list of length equal to the number of columns of H2OFrame. """ return H2OFrame(expr=ExprNode("scale", self, center, scale))
def rbind(self, data): """ Combine H2O Datasets by Rows. Takes a sequence of H2O data sets and combines them by rows. :param data: an H2OFrame :return: self, with data appended (row-wise) """ if not isinstance(data, H2OFrame): raise ValueError("`data` must be an H2OFrame, but got {0}".format(type(data))) return H2OFrame(expr=ExprNode("rbind", self, data))
def drop(self, i): """ Returns a Frame with the column at index i dropped. :param i: Column to drop :return: Returns an H2OFrame """ if isinstance(i, basestring): i = self._find_idx(i) return H2OFrame(expr=ExprNode("[", self, None,-(i+1)))._frame()
def setNames(self,names): """ Change the column names to `names`. :param names: A list of strings equal to the number of columns in the H2OFrame. :return: None. Rename the column names in this H2OFrame. """ h2o.rapids(ExprNode("colnames=", self, range(self.ncol), names)._eager()) self._update() return self
def merge(self, other, allLeft=False, allRite=False): """ Merge two datasets based on common column names :param other: Other dataset to merge. Must have at least one column in common with self, and all columns in common are used as the merge key. If you want to use only a subset of the columns in common, rename the other columns so the columns are unique in the merged result. :param allLeft: If true, include all rows from the left/self frame :param allRite: If true, include all rows from the right/other frame :return: Original self frame enhanced with merged columns and rows """ return H2OFrame(expr=ExprNode("merge", self, other, allLeft, allRite))._frame()
def quantile(self, prob=None, combine_method="interpolate"): """ Compute quantiles over a given H2OFrame. :param prob: A list of probabilties, default is [0.01,0.1,0.25,0.333,0.5,0.667,0.75,0.9,0.99]. You may provide any sequence of any length. :param combine_method: For even samples, how to combine quantiles. Should be one of ["interpolate", "average", "low", "hi"] :return: an H2OFrame containing the quantiles and probabilities. """ if len(self) == 0: return self if not prob: prob=[0.01,0.1,0.25,0.333,0.5,0.667,0.75,0.9,0.99] return H2OFrame(expr=ExprNode("quantile",self,prob,combine_method))._frame()
def pop(self,i): """ Pop a colunn out of an H2OFrame. :param i: The index or name of the column to pop. :return: The column dropped from the frame. """ if isinstance(i, basestring): i=self._find_idx(i) col = H2OFrame(expr=ExprNode("pop",self,i))._frame() self._update() return col
def remove_vecs(self, cols): """ :param cols: Drop these columns. :return: A frame with the columns dropped. """ self._eager() is_char = all([isinstance(i,basestring) for i in cols]) if is_char: cols = [self._find_idx(col) for col in cols] cols = sorted(cols) return H2OFrame(expr=ExprNode("removeVecs",self,cols))._frame()
def ifelse(test,yes,no): """ Semantically equivalent to R's ifelse. Based on the booleans in the test vector, the output has the values of the yes and no vectors interleaved (or merged together). :param test: A "test" H2OFrame :param yes: A "yes" H2OFrame :param no: A "no" H2OFrame :return: An H2OFrame """ return H2OFrame(expr=ExprNode("ifelse",test,yes,no))._frame()
def cut(self, breaks, labels=None, include_lowest=False, right=True, dig_lab=3): """ Cut a numeric vector into factor "buckets". Similar to R's cut method. :param breaks: The cut points in the numeric vector (must span the range of the col.) :param labels: Factor labels, defaults to set notation of intervals defined by breaks.s :param include_lowest: By default, cuts are defined as (lo,hi]. If True, get [lo,hi]. :param right: Include the high value: (lo,hi]. If False, get (lo,hi). :param dig_lab: Number of digits following the decimal point to consider. :return: A factor column. """ return H2OFrame(expr=ExprNode("cut",self,breaks,labels,include_lowest,right,dig_lab))
def setName(self,col=None,name=None): """ Set the name of the column at the specified index. :param col: Index of the column whose name is to be set. :param name: The new name of the column to set :return: the input frame """ if not isinstance(col, int) and self.ncol > 1: raise ValueError("`col` must be an index. Got: " + str(col)) if self.ncol == 1: col = 0 h2o.rapids(ExprNode("colnames=", self, col, name)._eager()) self._update() return self