Пример #1
0
def test()->tuple:
    """
    Load test dataset.
    return -- tuple(dataframe data, dictionary columns)
    """
    # header
    click.secho('Load data..', fg='green')
    # read data
    df, col = csv2df(PATH_TEST, ltarget=[], lindex=['PassengerId'])
    # return
    return (df, col)
Пример #2
0
def train()->tuple:
    """
    Load training dataset.
    return -- tuple(dataframe data, dictionary columns)
    """
    # header
    click.secho('Load data..', fg='green')
    # read data
    df, col = csv2df(PATH_TRAIN, ltarget=['Survived'], lindex=['PassengerId'])
    # return
    return (df, col)
Пример #3
0
def load()->tuple:
    """
    Load weather dataset (without target variable).
    return -- tuple(dataframe data, dictionary columns)
    """
    # header
    click.secho('Load data..', fg='green')
    # read data
    ddt = {'lcol': ['dt'], 'sformat': '%Y-%m-%d %H:%M:%S'}
    df, dcol = csv2df(PATH, lindex=['dt'], ddt=ddt)
    # return
    return (df, dcol)
Пример #4
0
def load() -> tuple:
    """
    Load wine dataset (without target variable).
    return -- tuple(dataframe data, dictionary columns)
    """
    # header
    click.secho('Load data..', fg='green')
    # read data
    df, dcol = csv2df(PATH)
    # format
    df.Proline = df.Proline.astype(float)
    df.Magnesium = df.Magnesium.astype(float)
    df.Alcohol = df.Alcohol.astype(int)
    # update dcol
    col = columns()
    col.get(df)
    # return
    return (df, col)
Пример #5
0
def load() -> tuple:
    """
    Load solar dataset.
    return -- tuple(dataframe data, dictionary columns)
    """
    # header
    click.secho('Load data..', fg='green')
    # read data
    ddt = {'lcol': ['dt'], 'sformat': '%Y-%m-%d %H:%M:%S'}
    df, dcol = csv2df(PATH, ltarget=['y', 'cy'], lindex=['dt'], ddt=ddt)
    # format
    df.cy = df.cy.astype(int)
    # update dcol
    col = columns()
    col.get(df, ['y', 'cy'])

    # return
    return (df, col)
Пример #6
0
        quit('Aborted!')

    # fit, transform and return
    return transformer.full_pipeline.fit_transform(df)


def numerical(df: 'dataframe')->'array':
    """
    Launch a pre-processing Pipeline with only numerical variables.
    df -- data to be transformed.
    """
    # validate if there are NaN values.
    if df.isnull().sum().sum() > 0:
        click.secho('[error] the dataframe to be transformated contains NaN values.', fg='red', bold=True)
        print(df.isnull().sum())
        quit('Aborted!')

    # fit, transform and return
    return transformer.num_pipeline.fit_transform(df)


if __name__ == '__main__':
    from tools import reader
    # read data
    data, dcol = reader.csv2df('../../datasets/dataset.weather.csv', lindex=['datetime'])
    # get a sample
    dfX = data[dcol['lc_float'][:1] + dcol['lc_cat'][:1]]
    # transformation
    X = full(dfX.dropna().head())
    print(X[:, :5])