# In[361]:

metastatic_exp = pd.read_csv(os.path.join(external_data_dir,
                                          "gse18549_expression.csv"),
                             index_col=0).T.values
metastatic_exp_scaled = scale(metastatic_exp)
metastatic_exp_robust = RobustScaler(
    quantile_range=(1.0, 99.0)).fit_transform(metastatic_exp)
metastatic_exp_normed = normalize(metastatic_exp)
metastatic_exp_scaled_normed = normalize(scale(metastatic_exp))

# In[362]:

print(metastatic_exp.min().min(), metastatic_exp.max().max())
print(metastatic_exp_scaled.min().min(), metastatic_exp_scaled.max().max())
print(metastatic_exp_robust.min().min(), metastatic_exp_robust.max().max())
print(metastatic_exp_normed.min().min(), metastatic_exp_normed.max().max())
print(metastatic_exp_scaled_normed.min().min(),
      metastatic_exp_scaled_normed.max().max())

# In[366]:

_ = plt.hist(metastatic_exp.flatten(), bins=50, normed=True, label='raw')
_ = plt.hist(metastatic_exp_scaled.flatten(),
             bins=50,
             normed=True,
             label='scale')
_ = plt.hist(metastatic_exp_robust.flatten(),
             bins=50,
             normed=True,
             label='quantile')
示例#2
0
    X_scale=minmax_scale(X)
    
elif c==0:#不进行归一化
    X_scale=X
    
elif c==3:#鲁棒性归一化
    from sklearn.preprocessing import RobustScaler
    X_scale=RobustScaler().fit_transform(X)

print 'the standar result of X is:',X_scale
##测试X_scale,正常情况下均值为0,方差为1
#1.
print 'mean=',X_scale.mean()
print 'std=',X_scale.std()
#2.
print 'min=',X_scale.min()
print 'max=',X_scale.max()
csv_file1.close()

##为了理解方便、表示方法简单
X=X_scale

##归一化之后的统计信息
##获得X的统计信息
statistics(X)
##频率分布图
#drawHist(X,'AOD','Frequency','the Frequency of standar AOD')
##频率累计图
#drawCumulativeHist(X,'AOD','Frequency','Curve cumulative of standar AOD')
##箱图
#drawBox(X.reshape(264,),'AOD','BOX of standar AOD')