Exemplo n.º 1
0
    def apply(self,
              columns=None,
              mapper=None,
              new_column=None,
              lazy=True,
              drop=False,
              dillify=False):

        if isinstance(new_column, (list, tuple)):
            new_column = one(new_column)

        if isinstance(mapper, (list, tuple)):
            mapper = one(mapper)

        if isinstance(drop, (list, tuple)):
            drop = one(drop)

        if isinstance(drop, (str, unicode)):
            drop = drop.lower()
            if drop == 'true':
                drop = True
            elif drop == 'false':
                drop = False
            else:
                raise RuntimeError('drop string not recognized as book')
        else:
            assert isinstance(drop, bool)

        new_node = self.active.apply(columns=columns,
                                     mapper=mapper,
                                     new_column=new_column,
                                     lazy=lazy,
                                     drop=drop,
                                     dillify=dillify)
        self.model.set_active(new_node)
Exemplo n.º 2
0
    def read(self, query=None, filename=None, uri=None, password=None):

        all_query_list = []
        if isinstance(query, (list, tuple)):
            for q in query:
                all_query_list.append(q)
        elif isinstance(query, (str, unicode)):
            q = query
            all_query_list.append(str(q))
        elif query is None:
            pass
        else:
            raise RuntimeError

        if isinstance(filename, (list, tuple)):
            for fn in filename:
                q = open(fn, 'r').read()
                all_query_list.append(q)
        elif isinstance(filename, (str, unicode)):
            q = open(filename, 'r').read()
            all_query_list.append(str(q))
        elif filename is None:
            pass
        else:
            raise RuntimeError

        if isinstance(uri, (list, tuple)):
            uri = one(uri)

        if password is not None:
            if isinstance(password, (list, tuple)):
                password = one(password)
            url_obj = urlparse.urlparse(uri)

            uri_pwd = '%s://%s:%s@%s:%s%s' % (url_obj.scheme, url_obj.username,
                                              password, url_obj.hostname,
                                              url_obj.port, url_obj.path)
        else:
            uri_pwd = uri

        node_frame_list = []
        for q in all_query_list:
            df, load_time = read_file_query_uri(query=q, uri=uri_pwd)

            node_frame = NodeFrame(df=df,
                                   load_time=load_time,
                                   metadata={
                                       'query': query,
                                       'uri': uri_pwd
                                   })
            node_frame_list.append(node_frame)

        node = self.controller.create_node(tuple(node_frame_list),
                                           parent=self.model.root)
        self.model.set_active(node)
Exemplo n.º 3
0
    def transpose(self, index=None):

        if isinstance(index, (list, tuple)):
            index = one(index)

        new_node = self.active.transpose(index=index)
        self.model.set_active(new_node)
Exemplo n.º 4
0
    def query(self, query=None):

        if isinstance(query, (list, tuple)):
            query = one(query)

        new_node = self.active.query(query=query)
        self.model.set_active(new_node)
Exemplo n.º 5
0
    def merge(self, against=None, hinge_uuid=None, how='inner'):
        if isinstance(hinge_uuid, (list, tuple)):
            hinge_uuid = one(hinge_uuid)

        new_node = self.active.merge(against=against,
                                     hinge_uuid=hinge_uuid,
                                     how=how)
        self.model.set_active(new_node)
Exemplo n.º 6
0
    def open(self,
             filename=None,
             bookmark=None,
             index_col=None,
             header=None,
             sheet_name=0):

        if not isinstance(filename, (unicode, str)):
            filename = one(filename)

        if isinstance(index_col, (list, tuple)):
            index_col = one(index_col)
            if isinstance(index_col, (unicode, str)):
                index_col = str(index_col)
                if index_col.lower() in ('none', ''):
                    index_col = None
                else:
                    index_col = int(index_col)

        if isinstance(sheet_name, (list, tuple)):
            sheet_name = one(sheet_name)
            if isinstance(sheet_name, (unicode, str)):
                sheet_name = str(sheet_name)
                if sheet_name.lower() in ('none', ''):
                    sheet_name = 0

        if isinstance(header, (list, tuple)):
            header = one(header)
            if isinstance(header, (unicode, str)):
                header = str(header)
                if header.lower() in ('none', ''):
                    header = None
                else:
                    header = int(header)

        new_node = self.controller.open_node_from_file(filename=filename,
                                                       bookmark=bookmark,
                                                       index_col=index_col,
                                                       header=header,
                                                       sheet_name=sheet_name)
        self.model.set_active(new_node)
        if bookmark is not None:
            self.bookmark(bookmark)
Exemplo n.º 7
0
    def fold(self, by=None, reduce=None):

        if reduce is None:
            reduce = dict()

        if by is None or len(by) == 0:

            def f(x):
                # print x
                return pd.Series(list(x))

            # print '---'
            # blah = self.df.T.apply(f, axis=1)
            # print blah
            # print type(blah)
            # print '===='

            # tmp = pd.DataFrame({'x':[x for x in self.df.iteritems()]})
            final_df = pd.DataFrame({
                key: [InteriorSeries(col)]
                for key, col in self.df.iteritems()
            })
            # final_df = tmp.T.reset_index(drop=True)
            return final_df

        else:
            data_dict = {}
            for key, df in self.df.groupby(by):
                data_dict[key] = df.T.apply(
                    lambda x: InteriorSeries(pd.Series(list(x))),
                    axis=1).drop(by)

            tmp = pd.DataFrame(data_dict)
            if isinstance(by, list) and len(by) == 1:
                by = one(by)

            tmp.columns = tmp.columns.rename(by)

            final_df = tmp.T.reset_index()

            return final_df
Exemplo n.º 8
0
    def __str__(self):

        if self.name is None:
            name_prefix = '<anon>'.format(anon='')
        else:
            name_prefix = self.name

        if len(self) == 1:
            describe_df = one(self.node_frames).describe(include='all')
            describe_df.columns.name = '{name_prefix}'.format(
                name_prefix=name_prefix)
            return str(describe_df)
        else:
            describe_df_list = [(self.get_key(x), x.describe(include='all'))
                                for x in self.node_frames]
            for curr_key, df in describe_df_list:
                df.columns.name = '{name_prefix}[{ii}]'.format(
                    name_prefix=name_prefix, ii=curr_key)
            zipped_row_list = zip(
                *[str(x[1]).split('\n') for x in describe_df_list])
            return '\n'.join(['  |  '.join(row) for row in zipped_row_list])
Exemplo n.º 9
0
 def bookmark(self, name=None):
     if not isinstance(name, (unicode, str)):
         name = one(name)
     self.model.active.rename(str(name))
Exemplo n.º 10
0
    def apply(self, **kwargs):

        if kwargs.get('lazy', True):

            def apply_fcn(args):

                # Marshalling to prepare for payload dumps:
                if isinstance(args, (pd.Series, )):

                    # Multi column; arranged as a series:
                    new_args = [{}]
                    for key, val in args.iteritems():
                        try:
                            new_args[0][key] = val.to_dict()
                        except AttributeError:
                            new_args[0][key] = val

                    args = new_args

                elif isinstance(args, (InteriorSeries, )):
                    args = [args.to_dict()]

                else:

                    # usually a single column with simple data i.e. filename;  need to wrap to unpack with *args in tgt function
                    args = [str(args)]  # str is a unicode guard

                payload = {
                    'mapper': str(kwargs['mapper']),
                    'args': args,
                    'kwargs': {}
                }

                id = generate_uuid()
                div_txt = '<div id="{id}"></div>'.format(id=id)
                js = '$(".dataframe").on("draw.dt", function() {{\
                                                                if ($("#{id}").is(":visible") && $("#{id}").is(":empty")  ){{\
                                                                                                $.ajax({{type : "POST",\
                                                                                                        url : "http://nicholasc-ubuntu:5050/lazy_formatting/{session_uuid}",\
                                                                                                        data: JSON.stringify({payload}, null, "\t"),\
                                                                                                        contentType: "application/json;charset=UTF-8",\
                                                                                                        success: function(result) {{\
                                                                                                                                $("#{id}").html(JSON.parse(result)["result"]);\
                                                                                                                                    console.log("HW");\
                                                                                                                                    }}\
                                                                                                        }});\
                                                                                                }};\
                                                                }});'.format(
                    id=id,
                    payload=json.dumps(payload),
                    session_uuid='{{session_uuid}}')

                js_txt = """<script>{js}</script>""".format(js=js)

                f = ''.join([div_txt, js_txt])

                return f

        else:

            # Will still need for non-server mode (aka lazy=false)
            if kwargs.get('dillify', False):
                apply_fcn = dill.loads(kwargs['mapper'].encode('latin1'))
            else:
                apply_fcn = mapper_library_dict[kwargs['mapper']]

        if isinstance(kwargs['columns'],
                      (list, tuple)) and len(kwargs['columns']) > 1:
            result_series = self.df[kwargs['columns']].apply(apply_fcn, axis=1)
        elif isinstance(kwargs['columns'],
                        (list, tuple)) and len(kwargs['columns']) == 1:
            result_series = self.df[one(kwargs['columns'])].apply(apply_fcn)
        else:
            result_series = self.df[kwargs['columns']].apply(apply_fcn)

        df = pd.DataFrame({kwargs['new_column']: result_series})

        df = df.join(self.df)
        if kwargs['drop'] == True:
            df = df.drop(kwargs['columns'], axis=1)

        return df
Exemplo n.º 11
0
 def get_node_by_name(self, bookmark):
     node_list = self.get_filtered_node_list(lambda n: n.name == bookmark)
     if len(node_list) == 0:
         return None
     else:
         return one(node_list)