Exemplo n.º 1
0
Arquivo: pot.py Projeto: yz-/ut
 def project_to(self, var_list=[]):
     """
     project to a subset of variables (marginalize out other variables)
     """
     var_list = colloc.intersect(ascertain_list(var_list), self.vars())
     if var_list:  # if non-empty, marginalize out other variables
         return Pot(self.tb[var_list + ['pval']].groupby(var_list).sum().reset_index())
     else:  # if var_list is empty, return a singleton potential containing the sum of the vals of self.tb
         return Pot(pd.DataFrame({'pval': self.tb['pval'].sum()}, index=['']))
Exemplo n.º 2
0
 def project_to(self, var_list=[]):
     """
     project to a subset of variables (marginalize out other variables)
     """
     var_list = colloc.intersect(ascertain_list(var_list), self.vars())
     if var_list:  # if non-empty, marginalize out other variables
         return Pot(self.tb[var_list +
                            ['pval']].groupby(var_list).sum().reset_index())
     else:  # if _var_list is empty, return a singleton potential containing the sum of the vals of self.tb
         return Pot(
             pd.DataFrame({'pval': self.tb['pval'].sum()}, index=['']))
Exemplo n.º 3
0
Arquivo: pot.py Projeto: yz-/ut
 def _merge_(self, pot):
     """
     Util function. Shouldn't really be used directly by the user.
     Merge (join) two pots.
     An inner merge of the two pots, on the intersection of their variables (if non-empty) will be performed,
     producing val_x and val_y columns that will contain the original left and right values, aligned with the join.
     Note: If the vars intersection is empty, the join will correspond to the cartesian product of the variables.
     """
     on = colloc.intersect(self.vars(), pot.vars())  # we will merge on the intersection of the variables (not pval)
     if on:
         return pd.merge(self.tb, pot.tb, how='inner', on=on, sort=True, suffixes=('_x', '_y'))
     else:  # if no common variables, take the cartesian product
         return cartesian_product(self.tb, pot.tb)
Exemplo n.º 4
0
def get_info_df(store, keys=None, info=None, cols=None):
    # process inputs
    if not keys:
        keys = store.keys()
    else:
        keys = util_ulist.ascertain_list(keys)
        keys = colloc.intersect(keys, store.keys())
    # get info_dict
    info_dict = get_info_dict(store)
    # make the df
    df = pd.DataFrame(
        [dict(v, **{'key': k}) for k, v in info_dict.iteritems()])
    df = df[df['key'].isin(keys)]
    if 'shape' in df.columns:
        del df['shape']
    if 'ncols' not in df.columns:
        df['ncols'] = np.nan
    if 'nrows' not in df.columns:
        df['nrows'] = np.nan
    # get ncols and nrows with missing
    idx = df['ncols'].isnull().nonzero()[
        0]  # ncols and nrows should both be missing when one is
    for i in idx:
        d = store[df['key'].iloc[i]]
        df['nrows'].iloc[i] = len(d)
        df['ncols'].iloc[i] = len(d.columns)
    # clean up and return
    df = df.set_index('key')
    df = df.sort_index()
    df = daf_manip.reorder_columns_as(
        df, ['nrows', 'ncols', 'isa', 'typ', 'indexers', 'dc'])
    df = df.replace(to_replace=np.nan, value='')
    if info:
        if isinstance(info, dict):
            # add as many columns as there are keys in dict, using the values of the dict as functions applied to
            # the whole stored dataframe to get the column value
            df = pd.concat(
                [df, pd.DataFrame(columns=info.keys(), index=df.index)],
                axis=1)
            for key in df.index.values:
                key_data = store[key]
                for k, v in info.iteritems():
                    df[k].loc[key] = v(key_data)
        elif np.all(map(lambda x: isinstance(x, basestring), info)):
            df = daf_manip.filter_columns(df, info)
        else:
            raise ValueError('Unrecognized info format')
    # filter cols
    if cols:
        df = daf_manip.filter_columns(df, cols)
    return df
Exemplo n.º 5
0
Arquivo: pstore.py Projeto: yz-/ut
def get_info_df(store, keys=None, info=None, cols=None):
    # process inputs
    if not keys:
        keys = store.keys()
    else:
        keys = util_ulist.ascertain_list(keys)
        keys = colloc.intersect(keys, store.keys())
    # get info_dict
    info_dict = get_info_dict(store)
    # make the df
    df = pd.DataFrame([dict(v, **{'key': k}) for k, v in info_dict.iteritems()])
    df = df[df['key'].isin(keys)]
    if 'shape' in df.columns:
        del df['shape']
    if 'ncols' not in df.columns:
        df['ncols'] = np.nan
    if 'nrows' not in df.columns:
        df['nrows'] = np.nan
    # get ncols and nrows with missing
    idx = df['ncols'].isnull().nonzero()[0]  # ncols and nrows should both be missing when one is
    for i in idx:
        d = store[df['key'].iloc[i]]
        df['nrows'].iloc[i] = len(d)
        df['ncols'].iloc[i] = len(d.columns)
    # clean up and return
    df = df.set_index('key')
    df = df.sort_index()
    df = daf_manip.reorder_columns_as(df, ['nrows', 'ncols', 'isa', 'typ', 'indexers', 'dc'])
    df = df.replace(to_replace=np.nan, value='')
    if info:
        if isinstance(info, dict):
            # add as many columns as there are keys in dict, using the values of the dict as functions applied to
            # the whole stored dataframe to get the column value
            df = pd.concat([df, pd.DataFrame(columns=info.keys(), index=df.index)], axis=1)
            for key in df.index.values:
                key_data = store[key]
                for k, v in info.iteritems():
                    df[k].loc[key] = v(key_data)
        elif np.all(map(lambda x: isinstance(x, basestring), info)):
            df = daf_manip.filter_columns(df, info)
        else:
            raise ValueError('Unrecognized info format')
    # filter cols
    if cols:
        df = daf_manip.filter_columns(df, cols)
    return df
Exemplo n.º 6
0
 def __or__(self, item):
     """
     If item is empty/none/false, a string or a list, it normalizes according to item.
     If item is a dict, it normalizes according to the keys, and slices according to the dict.
     --> This resembles P(A|B=1) kind of thing...
     """
     print "I'm trying to discourage using | now (might want to use it for fuzzy logic at some point"
     print "--> Use / instead of |. "
     if isinstance(item, basestring):
         return self / self.project_to([item])
     elif isinstance(item, list):
         return self / self.project_to(item)
     elif isinstance(item, dict):
         intercept_dict = item
         var_list = colloc.intersect(self.vars(), intercept_dict.keys())
         return (self / self.project_to(var_list)).get_slice(intercept_dict)
     else:
         TypeError('Unknown item type')
Exemplo n.º 7
0
Arquivo: pot.py Projeto: yz-/ut
 def __or__(self, item):
     """
     If item is empty/none/false, a string or a list, it normalizes according to item.
     If item is a dict, it normalizes according to the keys, and slices according to the dict.
     --> This resembles P(A|B=1) kind of thing...
     """
     print "I'm trying to discourage using | now (might want to use it for fuzzy logic at some point"
     print "--> Use / instead of |. "
     if isinstance(item, basestring):
         return self / self.project_to([item])
     elif isinstance(item, list):
         return self / self.project_to(item)
     elif isinstance(item, dict):
         intercept_dict = item
         var_list = colloc.intersect(self.vars(), intercept_dict.keys())
         return (self / self.project_to(var_list)).get_slice(intercept_dict)
     else:
         TypeError('Unknown item type')
Exemplo n.º 8
0
def get_col_names(store,
                  keys=None,
                  singular_info='index_and_columns',
                  print_results=False,
                  style='dict'):
    '''

    :param store: a HDFStore
    :param keys: list of keys to get info from (if present)
    :return: a cols_info dict whose keys are the keys of the store and values are a dict with
    'index', 'columns', and 'index_and_columns' which contain the data col names
    '''
    # process inputs
    if not keys:
        keys = store.keys()
    else:
        keys = util_ulist.ascertain_list(keys)
        keys = colloc.intersect(keys, store.keys())
    # make a dict with col (and index) info
    cols_info = dict()
    for key in keys:
        cols_info[key] = dict()
        df = store[key]
        cols_info[key]['index'] = list(df.index.names)
        cols_info[key]['columns'] = list(df.columns)
        cols_info[key]['index_and_columns'] = cols_info[key][
            'index'] + cols_info[key]['columns']
    if singular_info:
        cols_info_copy = cols_info
        cols_info = dict()
        for key in keys:
            cols_info[key] = cols_info_copy[key][singular_info]
    if print_results:
        PrettyPrinter(indent=2).pprint(cols_info)
    if style == 'dataframe':
        d = pd.DataFrame()
        for k, v in cols_info.iteritems():
            v = [x for x in v if x]
            d = pd.concat([d, pd.DataFrame(data=v, columns=[k])], axis=1)
        d = d.fillna(value='')
        cols_info = d.transpose()

    return cols_info
Exemplo n.º 9
0
 def _merge_(self, pot):
     """
     Util function. Shouldn't really be used directly by the user.
     Merge (join) two pots.
     An inner merge of the two pots, on the intersection of their variables (if non-empty) will be performed,
     producing val_x and val_y columns that will contain the original left and right values, aligned with the join.
     Note: If the vars intersection is empty, the join will correspond to the cartesian product of the variables.
     """
     on = colloc.intersect(self.vars(), pot.vars(
     ))  # we will merge on the intersection of the variables (not pval)
     if on:
         return pd.merge(self.tb,
                         pot.tb,
                         how='inner',
                         on=on,
                         sort=True,
                         suffixes=('_x', '_y'))
     else:  # if no common variables, take the cartesian product
         return cartesian_product(self.tb, pot.tb)
Exemplo n.º 10
0
 def __div__(self, item):
     """
     Operation depends on what item's type is. If item is a:
         Pot: perform potential division (like multiplication but with pvals divided).
         empty/none/false, a string or a list: normalize according to item.
         dict: it normalizes according to the keys, and slices according to the dict.
     --> This resembles P(A|B=1) kind of thing...
     """
     if isinstance(item, Pot):
         return Pot(_val_div_(self._merge_(item)))
     elif isinstance(item, basestring):
         return self.normalize([item])
     elif isinstance(item, list):
         return self.normalize(item)
     elif isinstance(item, dict):
         intercept_dict = item
         var_list = colloc.intersect(self.vars(), intercept_dict.keys())
         return self.normalize(var_list).get_slice(intercept_dict)
     else:
         TypeError('Unknown item type')
Exemplo n.º 11
0
Arquivo: pot.py Projeto: yz-/ut
 def __div__(self, item):
     """
     Operation depends on what item's type is. If item is a:
         Pot: perform potential division (like multiplication but with pvals divided).
         empty/none/false, a string or a list: normalize according to item.
         dict: it normalizes according to the keys, and slices according to the dict.
     --> This resembles P(A|B=1) kind of thing...
     """
     if isinstance(item, Pot):
         return Pot(_val_div_(self._merge_(item)))
     elif isinstance(item, basestring):
         return self.normalize([item])
     elif isinstance(item, list):
         return self.normalize(item)
     elif isinstance(item, dict):
         intercept_dict = item
         var_list = colloc.intersect(self.vars(), intercept_dict.keys())
         return self.normalize(var_list).get_slice(intercept_dict)
     else:
         TypeError('Unknown item type')
Exemplo n.º 12
0
Arquivo: pstore.py Projeto: yz-/ut
def get_col_names(store, keys=None, singular_info='index_and_columns', print_results=False, style='dict'):
    '''

    :param store: a HDFStore
    :param keys: list of keys to get info from (if present)
    :return: a cols_info dict whose keys are the keys of the store and values are a dict with
    'index', 'columns', and 'index_and_columns' which contain the data col names
    '''
    # process inputs
    if not keys:
        keys = store.keys()
    else:
        keys = util_ulist.ascertain_list(keys)
        keys = colloc.intersect(keys, store.keys())
    # make a dict with col (and index) info
    cols_info = dict()
    for key in keys:
        cols_info[key] = dict()
        df = store[key]
        cols_info[key]['index'] = list(df.index.names)
        cols_info[key]['columns'] = list(df.columns)
        cols_info[key]['index_and_columns'] = cols_info[key]['index'] + cols_info[key]['columns']
    if singular_info:
        cols_info_copy = cols_info
        cols_info = dict()
        for key in keys:
            cols_info[key] = cols_info_copy[key][singular_info]
    if print_results:
        PrettyPrinter(indent=2).pprint(cols_info)
    if style == 'dataframe':
        d = pd.DataFrame()
        for k, v in cols_info.iteritems():
            v = [x for x in v if x]
            d = pd.concat([d, pd.DataFrame(data=v, columns=[k])], axis=1)
        d = d.fillna(value='')
        cols_info = d.transpose()

    return cols_info
Exemplo n.º 13
0
def filter_columns(df, keep_only_columns_list):
    return df[colloc.intersect(df.columns, keep_only_columns_list)]
Exemplo n.º 14
0
Arquivo: manip.py Projeto: yz-/ut
def filter_columns(df, keep_only_columns_list):
    return df[colloc.intersect(df.columns, keep_only_columns_list)]