def apply(self, columns=None, mapper=None, new_column=None, lazy=True, drop=False, dillify=False): if isinstance(new_column, (list, tuple)): new_column = one(new_column) if isinstance(mapper, (list, tuple)): mapper = one(mapper) if isinstance(drop, (list, tuple)): drop = one(drop) if isinstance(drop, (str, unicode)): drop = drop.lower() if drop == 'true': drop = True elif drop == 'false': drop = False else: raise RuntimeError('drop string not recognized as book') else: assert isinstance(drop, bool) new_node = self.active.apply(columns=columns, mapper=mapper, new_column=new_column, lazy=lazy, drop=drop, dillify=dillify) self.model.set_active(new_node)
def read(self, query=None, filename=None, uri=None, password=None): all_query_list = [] if isinstance(query, (list, tuple)): for q in query: all_query_list.append(q) elif isinstance(query, (str, unicode)): q = query all_query_list.append(str(q)) elif query is None: pass else: raise RuntimeError if isinstance(filename, (list, tuple)): for fn in filename: q = open(fn, 'r').read() all_query_list.append(q) elif isinstance(filename, (str, unicode)): q = open(filename, 'r').read() all_query_list.append(str(q)) elif filename is None: pass else: raise RuntimeError if isinstance(uri, (list, tuple)): uri = one(uri) if password is not None: if isinstance(password, (list, tuple)): password = one(password) url_obj = urlparse.urlparse(uri) uri_pwd = '%s://%s:%s@%s:%s%s' % (url_obj.scheme, url_obj.username, password, url_obj.hostname, url_obj.port, url_obj.path) else: uri_pwd = uri node_frame_list = [] for q in all_query_list: df, load_time = read_file_query_uri(query=q, uri=uri_pwd) node_frame = NodeFrame(df=df, load_time=load_time, metadata={ 'query': query, 'uri': uri_pwd }) node_frame_list.append(node_frame) node = self.controller.create_node(tuple(node_frame_list), parent=self.model.root) self.model.set_active(node)
def transpose(self, index=None): if isinstance(index, (list, tuple)): index = one(index) new_node = self.active.transpose(index=index) self.model.set_active(new_node)
def query(self, query=None): if isinstance(query, (list, tuple)): query = one(query) new_node = self.active.query(query=query) self.model.set_active(new_node)
def merge(self, against=None, hinge_uuid=None, how='inner'): if isinstance(hinge_uuid, (list, tuple)): hinge_uuid = one(hinge_uuid) new_node = self.active.merge(against=against, hinge_uuid=hinge_uuid, how=how) self.model.set_active(new_node)
def open(self, filename=None, bookmark=None, index_col=None, header=None, sheet_name=0): if not isinstance(filename, (unicode, str)): filename = one(filename) if isinstance(index_col, (list, tuple)): index_col = one(index_col) if isinstance(index_col, (unicode, str)): index_col = str(index_col) if index_col.lower() in ('none', ''): index_col = None else: index_col = int(index_col) if isinstance(sheet_name, (list, tuple)): sheet_name = one(sheet_name) if isinstance(sheet_name, (unicode, str)): sheet_name = str(sheet_name) if sheet_name.lower() in ('none', ''): sheet_name = 0 if isinstance(header, (list, tuple)): header = one(header) if isinstance(header, (unicode, str)): header = str(header) if header.lower() in ('none', ''): header = None else: header = int(header) new_node = self.controller.open_node_from_file(filename=filename, bookmark=bookmark, index_col=index_col, header=header, sheet_name=sheet_name) self.model.set_active(new_node) if bookmark is not None: self.bookmark(bookmark)
def fold(self, by=None, reduce=None): if reduce is None: reduce = dict() if by is None or len(by) == 0: def f(x): # print x return pd.Series(list(x)) # print '---' # blah = self.df.T.apply(f, axis=1) # print blah # print type(blah) # print '====' # tmp = pd.DataFrame({'x':[x for x in self.df.iteritems()]}) final_df = pd.DataFrame({ key: [InteriorSeries(col)] for key, col in self.df.iteritems() }) # final_df = tmp.T.reset_index(drop=True) return final_df else: data_dict = {} for key, df in self.df.groupby(by): data_dict[key] = df.T.apply( lambda x: InteriorSeries(pd.Series(list(x))), axis=1).drop(by) tmp = pd.DataFrame(data_dict) if isinstance(by, list) and len(by) == 1: by = one(by) tmp.columns = tmp.columns.rename(by) final_df = tmp.T.reset_index() return final_df
def __str__(self): if self.name is None: name_prefix = '<anon>'.format(anon='') else: name_prefix = self.name if len(self) == 1: describe_df = one(self.node_frames).describe(include='all') describe_df.columns.name = '{name_prefix}'.format( name_prefix=name_prefix) return str(describe_df) else: describe_df_list = [(self.get_key(x), x.describe(include='all')) for x in self.node_frames] for curr_key, df in describe_df_list: df.columns.name = '{name_prefix}[{ii}]'.format( name_prefix=name_prefix, ii=curr_key) zipped_row_list = zip( *[str(x[1]).split('\n') for x in describe_df_list]) return '\n'.join([' | '.join(row) for row in zipped_row_list])
def bookmark(self, name=None): if not isinstance(name, (unicode, str)): name = one(name) self.model.active.rename(str(name))
def apply(self, **kwargs): if kwargs.get('lazy', True): def apply_fcn(args): # Marshalling to prepare for payload dumps: if isinstance(args, (pd.Series, )): # Multi column; arranged as a series: new_args = [{}] for key, val in args.iteritems(): try: new_args[0][key] = val.to_dict() except AttributeError: new_args[0][key] = val args = new_args elif isinstance(args, (InteriorSeries, )): args = [args.to_dict()] else: # usually a single column with simple data i.e. filename; need to wrap to unpack with *args in tgt function args = [str(args)] # str is a unicode guard payload = { 'mapper': str(kwargs['mapper']), 'args': args, 'kwargs': {} } id = generate_uuid() div_txt = '<div id="{id}"></div>'.format(id=id) js = '$(".dataframe").on("draw.dt", function() {{\ if ($("#{id}").is(":visible") && $("#{id}").is(":empty") ){{\ $.ajax({{type : "POST",\ url : "http://nicholasc-ubuntu:5050/lazy_formatting/{session_uuid}",\ data: JSON.stringify({payload}, null, "\t"),\ contentType: "application/json;charset=UTF-8",\ success: function(result) {{\ $("#{id}").html(JSON.parse(result)["result"]);\ console.log("HW");\ }}\ }});\ }};\ }});'.format( id=id, payload=json.dumps(payload), session_uuid='{{session_uuid}}') js_txt = """<script>{js}</script>""".format(js=js) f = ''.join([div_txt, js_txt]) return f else: # Will still need for non-server mode (aka lazy=false) if kwargs.get('dillify', False): apply_fcn = dill.loads(kwargs['mapper'].encode('latin1')) else: apply_fcn = mapper_library_dict[kwargs['mapper']] if isinstance(kwargs['columns'], (list, tuple)) and len(kwargs['columns']) > 1: result_series = self.df[kwargs['columns']].apply(apply_fcn, axis=1) elif isinstance(kwargs['columns'], (list, tuple)) and len(kwargs['columns']) == 1: result_series = self.df[one(kwargs['columns'])].apply(apply_fcn) else: result_series = self.df[kwargs['columns']].apply(apply_fcn) df = pd.DataFrame({kwargs['new_column']: result_series}) df = df.join(self.df) if kwargs['drop'] == True: df = df.drop(kwargs['columns'], axis=1) return df
def get_node_by_name(self, bookmark): node_list = self.get_filtered_node_list(lambda n: n.name == bookmark) if len(node_list) == 0: return None else: return one(node_list)