Exemplo n.º 1
0
def data_loader(status, shuffle=False, validation=False, num_workers=2):
    prepare_data = Preprocessing()
    if DATA_TYPE == 'original':
        data_X, data_y = prepare_data.original(status)
    elif DATA_TYPE == 'features':
        data_X, data_y = prepare_data.features(status)
    elif DATA_TYPE == 'trans':
        data_X, data_y = prepare_data.trans(status)
    
    data = DealDataset(data_X, data_y)
    size = data.len
    
    if validation:
        train, dev = random_split(data, [int(size*SPLIT_RATE), size-int(size*SPLIT_RATE)])
        train, dev = DealDataset(train[:][0],train[:][1]), DealDataset(dev[:][0],dev[:][1])
        train_loader = DataLoader(dataset=train,
                        batch_size=BATCH_SIZE, 
                        shuffle=shuffle,
                        num_workers=num_workers)
        dev_loader = DataLoader(dataset=dev,           
                        batch_size=BATCH_SIZE, 
                        shuffle=shuffle,
                        num_workers=num_workers)
        return train_loader, dev_loader
    else:
        loader = DataLoader(dataset=data,           
                        batch_size=BATCH_SIZE, 
                        shuffle=shuffle,
                        num_workers=num_workers)
        return loader
Exemplo n.º 2
0
import pandas as pd
import numpy as np
from utils.Preprocessing import *


def load_data(file_path):
    data = pd.read_csv(file_path, sep="\s+")
    return data


# X = load_data("../data/train/X_train.txt")
# y = load_data("../data/train/y_train.txt")
# y = np.asarray(y.values)
# actionA = X.iloc[np.argwhere(y==5)[:,0]]
# print(len(actionA))

a = Preprocessing()
X, Y = a.trans('train')
print(X.shape)
X, Y = a.trans('test')
print(X.shape)