Exemplo n.º 1
0
def process_pandas(data, endog_idx=0, exog_idx=None, index_idx=None):
    names = data.columns

    if isinstance(endog_idx, int):
        endog_name = names[endog_idx]
        endog = data[endog_name].copy()
        if exog_idx is None:
            exog = data.drop([endog_name], axis=1)
        else:
            exog = data[names[exog_idx]].copy()
    else:
        endog = data.loc[:, endog_idx].copy()
        endog_name = list(endog.columns)
        if exog_idx is None:
            exog = data.drop(endog_name, axis=1)
        elif isinstance(exog_idx, int):
            exog = data[names[exog_idx]].copy()
        else:
            exog = data[names[exog_idx]].copy()

    if index_idx is not None:  # NOTE: will have to be improved for dates
        index = Index(data.iloc[:, index_idx])
        endog.index = index
        exog.index = index.copy()
        data = data.set_index(names[index_idx])

    exog_name = list(exog.columns)
    dataset = Dataset(data=data, names=list(names), endog=endog,
                      exog=exog, endog_name=endog_name, exog_name=exog_name)
    return dataset
Exemplo n.º 2
0
def process_pandas(data, endog_idx=0, exog_idx=None, index_idx=None):
    names = data.columns

    if isinstance(endog_idx, (int, long)):
        endog_name = names[endog_idx]
        endog = data[endog_name].copy()
        if exog_idx is None:
            exog = data.drop([endog_name], axis=1)
        else:
            exog = data[names[exog_idx]].copy()
    else:
        endog = data.loc[:, endog_idx].copy()
        endog_name = list(endog.columns)
        if exog_idx is None:
            exog = data.drop(endog_name, axis=1)
        elif isinstance(exog_idx, (int, long)):
            exog = data[names[exog_idx]].copy()
        else:
            exog = data[names[exog_idx]].copy()

    if index_idx is not None:  # NOTE: will have to be improved for dates
        index = Index(data.iloc[:, index_idx])
        endog.index = index
        exog.index = index.copy()
        data = data.set_index(names[index_idx])

    exog_name = list(exog.columns)
    dataset = Dataset(data=data, names=list(names), endog=endog,
                      exog=exog, endog_name=endog_name, exog_name=exog_name)
    return dataset
Exemplo n.º 3
0
def sort_columns_based_first_character(columns: pd.Index,
                                       list_order: List[str]) -> np.ndarray:
    """
    columns の 先頭文字をlist_order の順番に沿って並び替える
    Params::
        columns: 対象のcolumn
        list_order: 先頭文字の順番
    """
    columns = columns.copy()
    df_col = pd.DataFrame(columns.values, columns=["colname"])
    df_col["order"] = np.nan  # None で初期化する. ここに順番を表す数字を入力する
    # 先頭文字が長い順にsortして、長い方から当てはめていく
    df = pd.DataFrame(list_order, columns=["first_character"])
    df["length"] = df["first_character"].apply(lambda x: len(x))
    df = df.sort_values(by=["length"], ascending=False)
    for i in df.index:
        first_character = df.loc[i, "first_character"]
        boolwk = (df_col["colname"].str.contains("^" + first_character)
                  & df_col["order"].isna())
        df_col.loc[boolwk, "order"] = i
    val = df_col["order"].fillna(
        -1).max()  # dtype=object で 1, nan, None の列のmax()は空になる
    df_col["order"] = df_col["order"].fillna(val + 1)

    return df_col.sort_values(by=["order", "colname"])["colname"].values
Exemplo n.º 4
0
def nanmap(columns: pd.Index, dict_map: dict) -> np.ndarray:
    """
    columns.map(dict) では dict に定義されていない変数は nan になる. 
    定義されてないcolumnはそのまま残すようにする
    """
    columns = columns.copy()
    columnswk = columns.map(dict_map).fillna("__work").values
    columns = columns.values
    columns[(columnswk != "__work")] = columnswk[(columnswk != "__work")]
    return columns
Exemplo n.º 5
0
    def from_pandas_index(cls, index: pd.Index, dim: Hashable):
        from .variable import IndexVariable

        if index.name is None:
            name = dim
            index = index.copy()
            index.name = dim
        else:
            name = index.name

        data = PandasIndexingAdapter(index)
        index_var = IndexVariable(dim, data, fastpath=True)

        return cls(index, dim), {name: index_var}