def topk(self, k): assert self._computed == True, "You haven't computed wordcount" param = { "k": str(k), OperationParam.list_str: self.list_name, "Type": "cpp" } op = Operation("Word#wordcount_topk_py", param, []) return scheduler.compute_collect(op)
def topk_pagerank(self, k): assert self._computed == True, "You haven't computed Pagerank" param = { "k": str(k), OperationParam.list_str: self.list_name, "Type": "cpp" } op = Operation("Graph#pagerank_topk_py", param, []) topk_list = scheduler.compute_collect(op) return topk_list
def reduce(self, func): if hasattr(func, '__call__'): param = { OperationParam.lambda_str: func, OperationParam.list_str: self.list_name } op = Operation("Functional#reduce_py", param, [self.pending_op]) return compute_collect(op) else: return NotImplemented
def train(self, n_iter=10, alpha=0.1): assert self.loaded assert isinstance(n_iter, int) assert isinstance(alpha, float) param = { "n_iter": str(n_iter), "alpha": str(alpha), OperationParam.list_str: self.list_name, "Type": "cpp" } op = Operation("SVMModel#SVM_train_py", param, []) param_list = scheduler.compute_collect(op) self.param = np.array(param_list[:-1]) self.intercept = param_list[-1] self.loaded = False self.trained = True
def train(self, n_iter=10, alpha=0.1, is_sparse=1): assert self.loaded assert isinstance(n_iter, int) assert isinstance(alpha, float) self.pending_op = Operation("LogisticRegressionModel#LogisticR_train_py", { "n_iter" : str(n_iter), "alpha" : str(alpha), OperationParam.list_str : self.list_name, "is_sparse": str(is_sparse), "Type" : "cpp" }, [Operation("LogisticRegressionModel#LogisticR_init_py", {"Type" : "cpp"})] \ if self.trained else [self.pending_op] ) print self.pending_op.op_deps paramlist = scheduler.compute_collect(self.pending_op) self.param = np.array(paramlist[:-1]) self.intercept = paramlist[-1] self.trained = True
def print_all(self): assert self._computed == True, "You haven't computed wordcount" param = {OperationParam.list_str: self.list_name, "Type": "cpp"} op = Operation("Word#wordcount_print_py", param, []) return scheduler.compute_collect(op)
def collect(self): param = {OperationParam.list_str: self.list_name} op = Operation("Functional#collect_py", param, [self.pending_op]) return compute_collect(op)