Exemplos de setdiff em Python, exemplos de ut.pcoll.order_conserving.setdiff em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: manip.py Projeto: yz-/ut

def hetero_concat(df_list):
    df = df_list[0]
    for dfi in df_list[1:]:
        add_nan_cols(df, colloc.setdiff(dfi.columns, df.columns))
        add_nan_cols(dfi, colloc.setdiff(df.columns, dfi.columns))
        dfi = dfi[df.columns]
        df = pd.concat([df, dfi])
    return replace_nans_with_spaces_in_object_columns(df)

Exemplo n.º 2

0

Exibir arquivo

def hetero_concat(df_list):
    df = df_list[0]
    for dfi in df_list[1:]:
        add_nan_cols(df, colloc.setdiff(dfi.columns, df.columns))
        add_nan_cols(dfi, colloc.setdiff(df.columns, dfi.columns))
        dfi = dfi[df.columns]
        df = pd.concat([df, dfi])
    return replace_nans_with_spaces_in_object_columns(df)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: pot.py Projeto: yz-/ut

 def from_count_df_to_count(cls, count_df, count_col='pval'):
     """
     Creates a potential from a dataframe specifying point counts (where the count column name is specified by
     count_col
     """
     pot_vars = list(colloc.setdiff(count_df.columns, [count_col]))
     tb = count_df[pot_vars+[count_col]].groupby(pot_vars).sum().reset_index()
     tb = ch_col_names(tb, 'pval', count_col)
     return Pot(tb)

Exemplo n.º 4

0

Exibir arquivo

def semantics_term_stats_maker_mk_terms_df(df,
                                           text_cols,
                                           id_cols=None,
                                           tokenizer_re=tokenizer_re):
    text_cols = util_ulist.ascertain_list(text_cols)
    if id_cols is None:
        id_cols = colloc.setdiff(df.columns, text_cols)
    else:
        id_cols = util_ulist.ascertain_list(id_cols)
        id_cols_missing = colloc.setdiff(id_cols, df.columns)
        if id_cols_missing:  # if any columns are missing, try to get them from named index
            df = df.reset_index(id_cols_missing)
    dd = pd.DataFrame()
    for c in text_cols:
        d = df[id_cols]
        d['term'] = map(lambda x: re.findall(tokenizer_re, x), df[c])
        d = daf_manip.rollout_cols(d, cols_to_rollout='term')
        dd = pd.concat([dd, d])
    return dd

Exemplo n.º 5

0

Exibir arquivo

 def from_count_df_to_count(cls, count_df, count_col='pval'):
     """
     Creates a potential from a dataframe specifying point counts (where the count column name is specified by
     count_col
     """
     pot_vars = list(colloc.setdiff(count_df.columns, [count_col]))
     tb = count_df[pot_vars +
                   [count_col]].groupby(pot_vars).sum().reset_index()
     tb = ch_col_names(tb, 'pval', count_col)
     return Pot(tb)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: manip.py Projeto: yz-/ut

def gather_col_values(df,
                      cols_to_gather=None,
                      gathered_col_name='gathered_cols',
                      keep_cols_that_were_gathered=False,
                      remove_empty_values=True):
    cols_to_gather = cols_to_gather or df.columns
    df = df.copy()
    if remove_empty_values == False:
        df[gathered_col_name] = [list(x[1:]) for x in df[cols_to_gather].itertuples()]
    else:
        df[gathered_col_name] = \
            map(lambda x: [xx for xx in x if xx], [list(x[1:]) for x in df[cols_to_gather].itertuples()])
    if keep_cols_that_were_gathered==False:
        df = df[colloc.setdiff(df.columns, cols_to_gather)]
    return df

Exemplo n.º 7

0

Exibir arquivo

def gather_col_values(df,
                      cols_to_gather=None,
                      gathered_col_name='gathered_cols',
                      keep_cols_that_were_gathered=False,
                      remove_empty_values=True):
    if cols_to_gather is None:
        cols_to_gather = df.columns
    df = df.copy()
    if remove_empty_values == False:
        df[gathered_col_name] = [list(x[1:]) for x in df[cols_to_gather].itertuples()]
    else:
        df[gathered_col_name] = \
            map(lambda x: [xx for xx in x if xx], [list(x[1:]) for x in df[cols_to_gather].itertuples()])
    if keep_cols_that_were_gathered==False:
        df = df[colloc.setdiff(df.columns, cols_to_gather)]
    return df

Exemplo n.º 8

0

Exibir arquivo

def rollout_cols(df, cols_to_rollout=None):
    """
    rolls out the values of cols_to_rollout so that each individual list (or other iterable) element is on it's own row,
    with other non-cols_to_rollout values aligned with them as in the original dataframe
    Example:
    df =
        A   B
        1   [11,111]
        2   [22]
        3   [3,33,333]
    rollout_cols(df, cols_to_rollout='B') =
        A   B
        1   11
        1   111
        2   22
        3   3
        3   33
        3   333
    """
    # if no cols_to_rollout is given, (try to) rollout all columns that are iterable (lists, etc.)
    cols_to_rollout = cols_to_rollout or daf_diagnosis.cols_that_are_of_the_type(df, util_var.is_an_iter)
    # make sure cols_to_rollout is a list
    cols_to_rollout = util_ulist.ascertain_list(cols_to_rollout)
    # get non_rollout_columns
    non_rollout_columns = colloc.setdiff(df.columns, cols_to_rollout)
    # mk an array with the lengths of the lists to rollout (get it from the first cols_to_rollout and cross fingers that
    # all cols_to_rollout have the same list lengths
    rollout_lengths = np.array(df[cols_to_rollout[0]].apply(len))
    # create a rollout_df dataframe (this will be the output)
    rollout_df = pd.DataFrame(range(np.sum(rollout_lengths)))  # TODO: I CANNOT F**ING BELIEVE I'M DOING THIS!!! But found no other way to make a dataframe empty, and then construct it on the fly!
    # rollout cols_to_rollout
    for c in cols_to_rollout:
        rollout_df[c] = np.concatenate(list(df[c]))
    # rollout cols_to_rollout
    for c in non_rollout_columns:
        t = [np.tile(x, (y, 1)) for (x, y) in zip(df[c], rollout_lengths)]
        try:
            rollout_df[c] = np.concatenate(t)
        except ValueError:
            rollout_df[c] = [x for x in chain(*t)]
    # put the columns in their original order
    return rollout_df[df.columns]

Exemplo n.º 9

0

Exibir arquivo

Arquivo: manip.py Projeto: yz-/ut

def rollout_cols(df, cols_to_rollout=None):
    """
    rolls out the values of cols_to_rollout so that each individual list (or other iterable) element is on it's own row,
    with other non-cols_to_rollout values aligned with them as in the original dataframe
    Example:
    df =
        A   B
        1   [11,111]
        2   [22]
        3   [3,33,333]
    rollout_cols(df, cols_to_rollout='B') =
        A   B
        1   11
        1   111
        2   22
        3   3
        3   33
        3   333
    """
    # if no cols_to_rollout is given, (try to) rollout all columns that are iterable (lists, etc.)
    cols_to_rollout = cols_to_rollout or daf_diagnosis.cols_that_are_of_the_type(df, util_var.is_an_iter)
    # make sure cols_to_rollout is a list
    cols_to_rollout = util_ulist.ascertain_list(cols_to_rollout)
    # get non_rollout_columns
    non_rollout_columns = colloc.setdiff(df.columns, cols_to_rollout)
    # mk an array with the lengths of the lists to rollout (get it from the first cols_to_rollout and cross fingers that
    # all cols_to_rollout have the same list lengths
    rollout_lengths = np.array(df[cols_to_rollout[0]].apply(len))
    # create a rollout_df dataframe (this will be the output)
    rollout_df = pd.DataFrame(range(np.sum(rollout_lengths)))  # TODO: I CANNOT F**ING BELIEVE I'M DOING THIS!!! But found no other way to make a dataframe empty, and then construct it on the fly!
    # rollout cols_to_rollout
    for c in cols_to_rollout:
        rollout_df[c] = np.concatenate(list(df[c]))
    # rollout cols_to_rollout
    for c in non_rollout_columns:
        t = [np.tile(x, (y, 1)) for (x, y) in zip(df[c], rollout_lengths)]
        try:
            rollout_df[c] = np.concatenate(t)
        except ValueError:
            rollout_df[c] = [x for x in chain(*t)]
    # put the columns in their original order
    return rollout_df[df.columns]

Exemplo n.º 10

0

Exibir arquivo

def rm_cols_if_present(df, cols):
    cols = util_ulist.ascertain_list(cols)
    return df[colloc.setdiff(df.columns, cols)]

Exemplo n.º 11

0

Exibir arquivo

Arquivo: manip.py Projeto: yz-/ut

def rm_cols_if_present(df, cols):
    cols = util_ulist.ascertain_list(cols)
    return df[colloc.setdiff(df.columns, cols)]

Exemplo n.º 12

0

Exibir arquivo

 def vars(self):
     return colloc.setdiff(list(self.tb.columns), ['pval'])

Exemplo n.º 13

0

Exibir arquivo

Arquivo: pot.py Projeto: yz-/ut

 def vars(self):
     return colloc.setdiff(list(self.tb.columns), ['pval'])