Exemplo n.º 1
0
 def __init__(self, file_path, transforms=None):
     self.dl = DataLoader()
     self.file_path = file_path
     self.l = [346449, 344698, 347650, 344969, 342916, 346811, 346365, 346392, 344410, 344082, 345631, 345813, 350407, 347874, 351165, 340722, 339552, 341436, 345346, 347898, 344836, 345878, 345580, 347927, 346940, 350253, 347506, 346421, 348828, 346186, 347325, 339731, 346873]
     self.current_mn = None
     self.data = None
     self.transforms = transforms
Exemplo n.º 2
0
def main():
    #1. 预处理meta相关
    dl = DataLoader()
    pp = None
    if not os.path.exists("meta.pkl"):  # 预处理meta不存在
        pp = PreProc(dl.t[1:])
        for machine_num in range(33):
            raw_data = dl(para.train_data, machine_num)  #加载数据
            data = raw_data[:, 1:]  #移除时间列
            pp.fit(data)
            print('机器 {0} 处理完毕。'.format(str(machine_num + 1)), flush=True)
        #保存预处理meta
        output_hal = open("meta.pkl", 'wb')
        s = pickle.dumps(pp)
        output_hal.write(s)
        output_hal.close()
        print('保存预处理meta完成。', flush=True)
    else:  #加载预处理meta
        with open("meta.pkl", 'rb') as file:
            pp = pickle.loads(file.read())
        print('加载预处理meta完成。', flush=True)

    #2. 数据batch化
    dataset = MyDataset(para.train_data)
    dataset_loader = torch.utils.data.DataLoader(dataset=dataset,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False)

    #3. 模型相关
    net2 = torch.nn.Sequential(torch.nn.Dropout(0.03),
                               torch.nn.Linear(141, 100), torch.nn.ReLU(),
                               torch.nn.Linear(100, 200), torch.nn.ReLU(),
                               torch.nn.Linear(200, 100), torch.nn.ReLU(),
                               torch.nn.Linear(100, 141))
    net2.double()
    optimizer = torch.optim.Adam(net2.parameters(), lr=LEARNING_RATE)
    loss_func = torch.nn.MSELoss()
    loss_list = []

    #4. 训练相关
    for epoch in range(EPOCH):
        print('epoch {0} start'.format(epoch), flush=True)
        for step, batch_x in tqdm(enumerate(dataset_loader),
                                  total=len(dataset_loader)):
            net2.zero_grad()
            batch_x = pp.transform(batch_x)
            prediction = net2(batch_x)
            loss = loss_func(prediction, batch_x)
            loss.backward()

            optimizer.step()
            if loss > 10:
                print(raw_data[step * BATCH_SIZE])
                break
            loss_list.append(loss)
    plt.plot(loss_list)
    plt.show()
Exemplo n.º 3
0
        x = self.predict(x)
        return x


net2 = torch.nn.Sequential(torch.nn.Dropout(0.1), torch.nn.Linear(84, 100),
                           torch.nn.ReLU(), torch.nn.Linear(100, 200),
                           torch.nn.ReLU(), torch.nn.Linear(200, 100),
                           torch.nn.ReLU(), torch.nn.Linear(100, 84))
net3 = torch.nn.Sequential(torch.nn.Dropout(0.1), torch.nn.Linear(84, 200),
                           torch.nn.ReLU(), torch.nn.Linear(200, 500),
                           torch.nn.ReLU(), torch.nn.Linear(500, 200),
                           torch.nn.ReLU(), torch.nn.Linear(200, 84))

if __name__ == "__main__":
    transformations = transforms.Compose([transforms.ToTensor()])
    dl = DataLoader()
    machine_num = 0  # 0号为001,1号为002,以此类推
    import para
    raw_data = dl(para.train_data, machine_num)  #加载数据

    data = raw_data[:, 1:]  #移除时间列
    pp = PreProc(dl.t[1:])
    pp.fit(data)  #预处理
    inputs = pp.transform(data)  #预处理输出
    print(np.any(np.isnan(inputs)))  #检测是否有NaN
    # custom_data_from_csv = CustomDatasetFromCSV(csv_path='/Users/yangyucheng/Desktop/SCADA/train/201807_1.csv')
    custom_data_from_pre = CustomDatasetFromPre(inputs)
    dataset_loader = torch.utils.data.DataLoader(dataset=custom_data_from_pre,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False)
Exemplo n.º 4
0
        error = mlp.validateModel(printToScreen=True)
        averageError += error

    averageError = averageError / folds
    li("Average error across all folds: {0}".format(averageError))
    return averageError


if __name__ == "__main__":

    if not args["data"]:
        data = raw_input("Path to dataset: ")
    else:
        data = args["data"]

    loader = DataLoader(data)
    li("Loading Dataset")
    data, meta = loader.load()
    li(meta)
    li("Dataset has Rows: {0}, Columns: {1}".format(len(data), len(meta.names())))

    if args["command"] in ("info"):
        sys.exit(0)

    if not args["labels"]:
        labels = raw_input("Which columns to use as labels? [{0}-{1}]: ".format(1, len(meta.names())))
    else:
        labels = args["labels"]
    setLabels(data, meta, labels)
    if len(meta.labelColumns) < 1:
        raise Exception("Specify atleast 1 label column")
Exemplo n.º 5
0
        error = mlp.validateModel(printToScreen=True)
        averageError += error

    averageError = averageError / folds
    li("Average error across all folds: {0}".format(averageError))
    return averageError


if __name__ == '__main__':

    if not args['data']:
        data = raw_input('Path to dataset: ')
    else:
        data = args['data']

    loader = DataLoader(data)
    li("Loading Dataset")
    data, meta = loader.load()
    li(meta)
    li("Dataset has Rows: {0}, Columns: {1}".format(len(data), len(meta.names())))

    if args['command'] in ("info"):
        sys.exit(0)

    if not args['labels']:
        labels = raw_input('Which columns to use as labels? [{0}-{1}]: '.format(1, len(meta.names())))
    else:
        labels = args['labels']
    setLabels(data, meta, labels)
    if len(meta.labelColumns) < 1:
        raise Exception("Specify atleast 1 label column")
Exemplo n.º 6
0
from utilities import DataLoader, DataHandler
from model import NBModel
from metrics import Metrics

if __name__ == "__main__":
    #Load data - assumes structure NEG and POS
    #file_dir = "/homes/ija23/nlp/data-tagged"
    file_dir = "/Users/igoradamski/Documents/cambridge/MLMI/nlp/coursework/nlp/data-tagged"

    pos_train, neg_train, pos_test, neg_test = DataHandler.getTrainTestSet(
        file_dir, 0, 899, 900, 999)

    x_train, y_train = DataHandler.mergeClasses(pos_train, neg_train)
    x_test, y_test = DataHandler.mergeClasses(pos_test, neg_test)

    x_train, _ = DataLoader.splitLines(x_train)
    x_test, _ = DataLoader.splitLines(x_test)

    print("training")
    model = NBModel.trainNB(x_train, y_train, 4)

    print("testing")
    predictions, prod_probs = NBModel.predictNB(x_test,
                                                y_test,
                                                model,
                                                smoothing=0)
    predictions_sm, prod_probs_sm = NBModel.predictNB(x_test,
                                                      y_test,
                                                      model,
                                                      smoothing=0.5)
Exemplo n.º 7
0
                      weight_decay=weight_decay)

scheduler = optim.lr_scheduler.StepLR(optimizer,
                                      step_size=200,
                                      gamma=0.5,
                                      last_epoch=-1,
                                      verbose=False)
criterion = nn.CrossEntropyLoss()

if __name__ == '__main__':

    #create train loader
    train_dataset = HumanActivityDataset(file='train')

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size_train,
                              shuffle=True,
                              num_workers=0)

    #create test loader
    test_dataset = HumanActivityDataset(file='test')

    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size_test,
                             shuffle=True,
                             num_workers=0)
    test_gen = iter(test_loader)

    #print every 't' steps
    t = 5

    for epoch in range(n_epochs):  # loop over the dataset multiple times