Python Normalizer.renormalize примеры использования

Язык программирования: Python

Пространство имен/Пакет: normalize

Класс/Тип: Normalizer

Метод/Функция: renormalize

Примеров на hotexamples.com: 1

Python Normalizer.renormalize - 1 пример найден. Это лучшие примеры Python кода для normalize.Normalizer.renormalize, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Normalizer(17)

normalize(4)

load_csv(2)

get_new_actors(1)

get_record_actor_new(1)

get_splitedData(1)

normalization(1)

normalizeArray(1)

normalize_dir(1)

normalized_letters(1)

renormalize(1)

set_records(1)

split(1)

stats(1)

Пример #1

Показать файл

def run_pipeline():

    #get training data
    training_data = pd.read_csv('worldbank-data/WDI_Data.csv')
    training_data.set_index(['Country Name', 'Indicator Name'], inplace=True)

    #convert to panel
    panel = training_data.to_panel()
    panel.drop(['Indicator Code', 'Country Code'], axis=0, inplace=True)
    panel = panel.swapaxes(0, 1)

    indicators_to_use = [
        'Agriculture, value added (% of GDP)',
        'Industry, value added (% of GDP)',
        'Services, etc., value added (% of GDP)',
        'Domestic credit provided by financial sector (% of GDP)',
        'GDP growth (annual %)', 'GDP (current US$)', 'Expense (% of GDP)',
        'Inflation, consumer prices (annual %)',
        'Inflation, GDP deflator (annual %)',
        'Total debt service (% of exports of goods, services and primary income)',
        'Current account balance (BoP, current US$)',
        'External balance on goods and services (% of GDP)',
        'Health expenditure, total (% of GDP)', 'Tax revenue (% of GDP)',
        'Gross capital formation (% of GDP)', 'Gross savings (% of GDP)',
        'Net investment in nonfinancial assets (% of GDP)',
        'Bank capital to assets ratio (%)',
        'Bank nonperforming loans to total gross loans (%)',
        'Broad money (% of GDP)',
        'Commercial bank branches (per 100,000 adults)',
        'Deposit interest rate (%)', 'Real interest rate (%)',
        'Risk premium on lending (lending rate minus treasury bill rate, %)',
        'Total reserves (includes gold, current US$)',
        'Unemployment, total (% of total labor force) (modeled ILO estimate)',
        'Interest rate spread (lending rate minus deposit rate, %)'
    ]
    print len(indicators_to_use), 'indicators used'
    panel = panel[:, :, indicators_to_use]

    target_variables = [
        'Agriculture, value added (% of GDP)',
        'Industry, value added (% of GDP)',
        'Services, etc., value added (% of GDP)', 'GDP growth (annual %)',
        'Inflation, GDP deflator (annual %)',
        'Gross capital formation (% of GDP)', 'Gross savings (% of GDP)',
        'Bank capital to assets ratio (%)',
        'Bank nonperforming loans to total gross loans (%)',
        'Deposit interest rate (%)', 'Real interest rate (%)',
        'Risk premium on lending (lending rate minus treasury bill rate, %)',
        'Unemployment, total (% of total labor force) (modeled ILO estimate)',
        'Interest rate spread (lending rate minus deposit rate, %)'
    ]
    #drop useless countries such as samoa, lesoto and so on.
    useful_countries = []
    for country in panel.axes[0]:
        if find_null_percentage(panel[country, :, :]) < 0.7:
            useful_countries.append(country)
    panel = panel.ix[useful_countries, :, :]

    normalizer = Normalizer(panel)
    normalized_panel = normalizer.normalize(panel)

    # #visualize normalization:
    # for indicator in normalized_panel.axes[2]:
    #     plot_hist(indicator, [panel, normalized_panel])

    # select train data
    years_to_validate = 1
    years_to_predict = 10
    years_train = generate_year_list(stop=2016 - years_to_validate)
    years_val = generate_year_list(start=2016 - years_to_validate + 1)
    years_predict = generate_year_list(start=2017,
                                       stop=2016 + years_to_predict)
    train_panel = normalized_panel[:, years_train, :].copy()

    # fill missing values:
    # either banal mean or median filling
    # or sampling with a generative bidirectional LSTM - see https://arxiv.org/abs/1306.1091

    generative_model = dense_generative_model(train_panel,
                                              hidden_layers=[120],
                                              epochs=100)
    sampled_filled_values = iterative_fill(generative_model,
                                           train_panel,
                                           normalizer,
                                           iterations=50,
                                           burn_in=10)
    train_panel.update(sampled_filled_values, overwrite=False)
    # or
    # train_panel.fillna(0, inplace=True)
    # or
    # train_panel = iterative_fill_bLSTM(train_panel)
    # or
    # filled_panel = fill_missing_bLSTM(train_panel, epochs=100)
    # train_panel.update(filled_panel, overwrite=False)
    # or
    # interpolate(train_panel)

    # create 1-step-ahead model
    epochs = 200
    hl = [100, 100]
    print "ARCHITECTURE:", hl
    print 'EPOCHS:', epochs
    X_train = train_panel[:, years_train, :][:, :-1, :]
    y_train = train_panel[:, years_train, :][:, 1:, :]
    model = dense_gradient_model(X_train,
                                 y_train,
                                 hidden_layers=hl,
                                 d=0.2,
                                 patience=50,
                                 epochs=epochs)

    # finally, predict
    for start, year in enumerate(years_val + years_predict):
        predictions = model.predict(train_panel[:,
                                                start + 1:, :].values)[:,
                                                                       -1, :]
        train_panel = train_panel.swapaxes(0, 1)
        new_year_df = pd.DataFrame(data=predictions,
                                   index=train_panel.axes[1],
                                   columns=y_train.axes[2])
        train_panel[year] = new_year_df
        train_panel = train_panel.swapaxes(0, 1)
    print "score:", rmse(
        normalized_panel[:, years_val, target_variables].values,
        train_panel[:, years_val, target_variables].values)

    #revert to original scale and distributions
    train_panel = normalizer.renormalize(train_panel)

    #convert to dataframe, and write relevant information to file
    target_countries = ['Bulgaria', 'Cyprus', 'Albania']
    train_panel = train_panel.swapaxes(0, 1)
    df = train_panel[:, target_countries,
                     target_variables].to_frame(filter_observations=False)
    df.to_csv('Predictions.csv')