import pandas as pd
import numpy as np
#from sklearn.metrics import roc_auc_score
import gc
gc.enable()
from dtypes import dtypes
import time
from sklearn.preprocessing import LabelEncoder

num_columns = [key for key, value in dtypes.items() if value is not 'category']
num_types = {
    key: value
    for key, value in dtypes.items() if value is not 'category'
}

print('Load numeric features')
train = pd.read_csv('train.csv',
                    dtype=num_types,
                    usecols=num_columns,
                    low_memory=True,
                    nrows=None)
num_columns.remove('HasDetections')
test = pd.read_csv('test.csv',
                   dtype=num_types,
                   usecols=num_columns,
                   low_memory=True,
                   nrows=None)
num_columns += ['HasDetections']

train = train.replace(np.nan, 0)
test = test.replace(np.nan, 0)
import pandas as pd
import numpy as np
#from sklearn.metrics import roc_auc_score
import gc
gc.enable()
from dtypes import dtypes
import time
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Embedding

cat_columns = [key for key, value in dtypes.items() if value is 'category']
cat_types = {key: value for key, value in dtypes.items() if value is 'category'}
cat_columns.remove('MachineIdentifier')
cat_types.pop('MachineIdentifier', None)
# cat_columns.remove('HasDetections')

# print(cat_types)
# exit()

print('Load category features')
train = pd.read_csv('train.csv', dtype=cat_types, usecols=cat_columns, low_memory=True, nrows=None)
# exit()
test  = pd.read_csv('test.csv',  dtype=cat_types, usecols=cat_columns, low_memory=True, nrows=None)

print('Transform category features')
col = 1
total_cols = len(cat_columns)

train = train.replace(np.nan, 0)
test = test.replace(np.nan, 0)