Exemplo n.º 1
0
# In[2]:

device_brand = pd.read_csv('new_feature/device_brand.csv')

# label =  sex+age, one-hot encode

# In[3]:

# trian data , test data
train_datapath = '../Demo/deviceid_train.tsv'
test_datapath = '../Demo/deviceid_test.tsv'
train_data, test_data = LoadData(train_datapath, test_datapath)

# In[4]:

train_data.drop(['sex', 'age'], axis=1, inplace=True)

test_data['label'] = 'unknow'

data = train_data.append(test_data)

# Merge device_brand

# In[5]:

data = data.merge(device_brand, on='device_id', how='left')

data.fillna('unknow', inplace=True)

# In[6]:
Exemplo n.º 2
0
    fold_names = list(range(n_folds))
    fold_names.append('overall')
    # Dataframe of validation scores
    metrics = pd.DataFrame({
        'fold': fold_names,
        'train_logloss': train_logloss,
        'valid_logloss': valid_logloss
    })
    return metrics, out_of_fold, test_predictions


# In[ ]:

useless_feature = FeatureSelect(train_data)

# In[ ]:

train = train_data.drop(useless_feature, axis=1)
test = test_data.drop(useless_feature, axis=1)

# In[ ]:

get_ipython().run_line_magic(
    'time',
    "metric, train_proba, test_proba = model(train, test, 'label', 22, 10)")

# In[ ]:

np.save('new_feature/lgbcnt_train.npy', train_proba)
np.save('new_feature/lgbcnt_test.npy', test_proba)