Exemplo n.º 1
0
# ### 不同尺度的降维

# In[8]:

svd100 = TruncatedSVD(n_components=100, n_iter=15, random_state=666)

brand_100 = pd.DataFrame(svd100.fit_transform(data.iloc[:, 1:]))
brand_100['device_id'] = data.device_id.values

# In[9]:

svd550 = TruncatedSVD(n_components=550, n_iter=15, random_state=666)
brand_550 = pd.DataFrame(svd550.fit_transform(data.iloc[:, 1:]))
brand_550['device_id'] = data.device_id.values
train = train_data.merge(brand_550, on='device_id', how='left')
test = test_data.merge(brand_550, on='device_id', how='left')

# In[14]:


def train_code(train_data, test_data, label, num_class, n_folds=5):
    labels = train_data[[label]]
    train_data = train_data.drop(['device_id', 'label'], axis=1)
    test_data = test_data.drop(['device_id', 'label'], axis=1)
    train_predvec = np.zeros((train_data.shape[0], num_class))
    test_predvec = np.zeros((test_data.shape[0], num_class))
    SKF = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=2018)
    for train_indices, valid_indices in SKF.split(train_data, labels):
        # Training data for the fold
        x_train = train_data.loc[train_indices, :]
Exemplo n.º 2
0
features.to_csv('features/h1.csv', index=False)

# ## Load train_data, test_data

# In[17]:

# train test data
train_datapath = '../Demo/deviceid_train.tsv'
test_datapath = '../Demo/deviceid_test.tsv'
train_data, test_data = LoadData(train_datapath, test_datapath)

# ### 第一组特征

# In[18]:

h1_train = train_data.merge(features, on='device_id', how='left')
h1_test = test_data.merge(features, on='device_id', how='left')

# ## Xgboost

# In[19]:

import gc
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
import warnings
warnings.filterwarnings('ignore')
get_ipython().run_line_magic('matplotlib', 'inline')
Exemplo n.º 3
0
ch_vector['device_id'] = groupfeature.device_id.values


# In[10]:


# s_hour + c_hour
sc_vector = sh_vector.merge(ch_vector, on='device_id', how='left')
sc_vector.to_csv('features/h3.csv',index=False)


# In[11]:


train_set = train_data.merge(sc_vector, on='device_id', how='left')
test_set = test_data.merge(sc_vector, on='device_id', how='left')


# train code

# In[12]:


def xgbc_code(train_data, test_data,label, num_class, n_folds=5,
              obj='multi:softprob', metric='mlogloss'):
    labels = train_data[[label]]
    train_data = train_data.drop(['device_id','sex','age','label'],axis=1)
    test_data = test_data.drop(['device_id'],axis=1)
    train_predvec = np.zeros((train_data.shape[0], num_class))
    test_predvec = np.zeros((test_data.shape[0], num_class))
Exemplo n.º 4
0
# In[11]:


# load trian test data
train_datapath =  '../Demo/deviceid_train.tsv' 
test_datapath =  '../Demo/deviceid_test.tsv' 
train_data, test_data = LoadData(train_datapath, test_datapath)


# ------------------------
# ## Merge data

# In[12]:


train_data = train_data.merge(label_tfidf,on='device_id',how='left')

test_data = test_data.merge(label_tfidf, on='device_id',how='left')


# ------------------------------------
# # Train code

# In[17]:


from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.metrics import log_loss
import warnings
import gc
Exemplo n.º 5
0
# ---------------

# ## Train/test

# In[6]:

train_path = '../Demo/deviceid_train.tsv'
test_path = '../Demo/deviceid_test.tsv'
train_data, test_data = LoadData(train_path, test_path)

# ### Merge(applist)

# In[7]:

train_data = train_data.merge(device_applist, on='device_id', how='left')
test_data = test_data.merge(device_applist, on='device_id', how='left')

# # MLPC

# In[8]:

from sklearn.model_selection import StratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import log_loss
import gc
import warnings
warnings.filterwarnings('ignore')

# In[9]:
Exemplo n.º 6
0
h1 = pd.read_csv('features/h1.csv')
h2 = pd.read_csv('features/h2_cnt300.csv')

# In[ ]:

# trian data , test data
# train test data
train_datapath = '../Demo/deviceid_train.tsv'
test_datapath = '../Demo/deviceid_test.tsv'
train_data, test_data = LoadData(train_datapath, test_datapath)

# Merge data

# In[ ]:

train_data = train_data.merge(applist, on='device_id', how='left')
train_data = train_data.merge(labelcnt, on='device_id', how='left')
train_data = train_data.merge(brand, on='device_id', how='left')
train_data = train_data.merge(h1, on='device_id', how='left')
train_data = train_data.merge(h2, on='device_id', how='left')

test_data = test_data.merge(applist, on='device_id', how='left')
test_data = test_data.merge(labelcnt, on='device_id', how='left')
test_data = test_data.merge(brand, on='device_id', how='left')
test_data = test_data.merge(h1, on='device_id', how='left')
test_data = test_data.merge(h2, on='device_id', how='left')

# Feature select

# In[ ]: