Python replace_cols_with_nan示例

编程语言: Python

命名空间/包名称: data_wrangling.dataframe_manager

方法/功能: replace_cols_with_nan

hotexamples.com的示例: 3

Python replace_cols_with_nan - 已找到3个示例。这些是从开源项目中提取的最受好评的data_wrangling.dataframe_manager.replace_cols_with_nan现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： correlation_stats.py 项目： abrarhayat/data_analysis_with_python

import pandas as pd
import data_wrangling.dataframe_manager as dm
import scipy.stats as scs

pd.set_option('display.max_columns', 200)
location = "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/auto.csv"
headers = ["symboling", "normalized-losses", "make", "fuel-type", "aspiration", "num-of-doors", "body-style",
           "drive-wheels", "engine-location", "wheel-base", "length", "width", "height", "curb-weight", "engine-type",
           "num-of-cylinders", "engine-size", "fuel-system", "bore", "stroke", "compression-ratio", "horsepower",
           "peak-rpm", "city-mpg", "highway-mpg", "price"]
df = dm.create_df(location, headers)
print(dm.print_4_heads(df, ""))
dm.replace_cols_with_nan(df, ["price", "horsepower"])
dm.replace_cols_with_mean(df, ["price", "horsepower"])
df["price"] = df["price"].astype("float")
df["horsepower"] = df['horsepower'].astype('float')

#PEARSON Correlation
pearson_coeff, p_value = scs.pearsonr(df['horsepower'], df['price'])
print('pearson_coeff: ', pearson_coeff)
print('p_value: ', p_value)

示例#2

显示文件

文件： data_normalization.py 项目： abrarhayat/data_analysis_with_python

import pandas as pd
import data_wrangling.dataframe_manager as dm

pd.set_option('display.max_columns', 200)

location = "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/auto.csv"
headers = ["symboling", "normalized-losses", "make", "fuel-type", "aspiration", "num-of-doors", "body-style",
           "drive-wheels", "engine-location", "wheel-base", "length", "width", "height", "curb-weight", "engine-type",
           "num-of-cylinders", "engine-size", "fuel-system", "bore", "stroke", "compression-ratio", "horsepower",
           "peak-rpm", "city-mpg", "highway-mpg", "price"]
df = dm.create_df(location, headers)
print(dm.print_4_heads(df, ""))

# replacing price and peak-rpm missing values
print('\n Replacing rows where values are not available')
df = dm.replace_cols_with_nan(df, ['price', 'peak-rpm'])
df = dm.replace_cols_with_mean(df, ['price', 'peak-rpm'])

print('\nChanging dtype for price and peak-rpm from object to float64: ')
df[["price", "peak-rpm"]] = df[["price", "peak-rpm"]].astype('float')
#df[["price", "peak-rpm"]] = dm.change_col_types(df, ["price", "peak-rpm"], 'float')



#NORMALIZATION

# normalizing the price with Simple Feature Scaling
df['price'] = df['price'] / df['price'].max()
print(dm.print_4_heads(df, "normalizing the price with Simple Feature Scaling"))

# normalizing the peak-rpm with Min-Max Method

示例#3

显示文件

import pandas as pd
import data_wrangling.dataframe_manager as dm

pd.set_option('display.max_columns', 200)
location = "https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/auto.csv"
headers = [
    "symboling", "normalized-losses", "make", "fuel-type", "aspiration",
    "num-of-doors", "body-style", "drive-wheels", "engine-location",
    "wheel-base", "length", "width", "height", "curb-weight", "engine-type",
    "num-of-cylinders", "engine-size", "fuel-system", "bore", "stroke",
    "compression-ratio", "horsepower", "peak-rpm", "city-mpg", "highway-mpg",
    "price"
]
df = dm.create_df(location, headers)
print(dm.print_4_heads(df, ""))
dm.replace_cols_with_nan(df, ["price"])
dm.replace_cols_with_mean(df, ["price"])
df["price"] = df["price"].astype("float")

#GROUP BY
print('\n')
print("GROUP BY DEMO")
df_test = df[['drive-wheels', 'body-style', 'price']]
df_grp = df_test.groupby(['drive-wheels', 'body-style']).mean()
print(df_grp)

#PIVOT
print('\n')
print("PIVOT Table DEMO")
df_pivot = df_grp.pivot(index='drive-wheels', columns='body-style')
print(df_pivot)