def prepare_data(self): if self.hparams.combined: standardize = "./saved_models/scaler_combined.pkl" else: standardize = "./saved_models/scaler.pkl" self.train_data = CSVDataset("../data/expanded/", standardize=standardize) self.validation_data = CSVDataset( "../data/expanded/", csv_features="dev_features.csv", csv_labels="dev_labels.csv", standardize=standardize, ) self.combined_data = torch.utils.data.ConcatDataset( [self.train_data, self.validation_data])
def data_loaders(dev_per=0.2, batch_size=8): csv_dataset = CSVDataset("x.csv", "y.csv") # train dev split indices = list(range(len(csv_dataset))) split = int(dev_per * len(csv_dataset)) validation_idx = np.random.choice(indices, size=split, replace=False) train_idx = list(set(indices) - set(validation_idx)) train_sampler = SubsetRandomSampler(train_idx) validation_sampler = SubsetRandomSampler(validation_idx) train_loader = DataLoader(dataset=csv_dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = DataLoader(dataset=csv_dataset, batch_size=len(validation_idx), sampler=validation_sampler) return train_loader, validation_loader
def main(): args = get_args() dataset = CSVDataset(args.csv_file, args.root_dir, args.cols, args.preprocess_fn, args.state, args.next_state, args.reward, args.info, args.done) agent = ValueAgentDataset(args.network, dataset, args.batch_size, args.gamma, args.device, args.optimizer, args.lr, args.hidden_dim, args.criterion, args.update) summary = SummaryWriter(args.summary_dir) #summary.add_hparams() num_iter = 0 for epoch in range(args.num_epochs): value_loss, value_mean, value_std = agent.train() summary.add_scalar('Loss/Value', value_loss, num_iter) summary.add_scalar('Stats/Value Mean', value_mean, num_iter) summary.add_scalar('Stats/Value Std', value_std, num_iter) num_iter += 1 summary.close()
op.append( (pv.item(), o1[2].item(), o2[2].item()) ) elapsed = time.time() - start n = 0 for data in loader : n = n + 1 label = data[1].item() ac = op.pop(0) px = ac[0] print( "{:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.4f}".format( label, px, ac[1], ac[2], abs(px-label) ) ) print('Done Testing in', int(1000000 * elapsed / n), "uS per px") if __name__ == "__main__" : model = Net().to( device ) model.load_state_dict(torch.load("model.pt")) model.eval() df_test = pd.read_csv( 'pricing-test.csv' ) test_labels = df_test.iloc[:, 0] test_data = df_test.iloc[:, 1:] test_dataset = CSVDataset( test_data, labels=test_labels, device=device ) test_loader = DataLoader(test_dataset, batch_size=1 ) test( model, test_loader )
parser.add_argument("--do_train", action="store_true", help="Do training") parser.add_argument("--do_eval", action="store_true", help="Eval test") args = parser.parse_args() args.do_train = True args.do_eval = True train_path = f"data/{args.dataset_name}/train.csv" test_path = f"data/{args.dataset_name}/test.csv" save_path = f"tmp/mlp_{args.dataset_name}" numeric_feature_names = get_numeric_feature_names(train_path) train_data = CSVDataset(train_path, numeric_feature_names=numeric_feature_names, label_name="target", is_reg=args.task == "reg") dev_data = CSVDataset(test_path, numeric_feature_names=numeric_feature_names, label_name="target", is_reg=args.task == "reg") if args.do_train: model = MLP(len(numeric_feature_names), train_data.num_label if args.task == "cls" else 1, task=args.task, hidden_units=[128, 64, 32], device=args.device) trainer = Trainer(train_data, model, dev_data=dev_data,
from mlp import MLP from trainer import Trainer # here we use the test set that is simply sampled 10% from the original iris dataset. train_path = "./data/iris/train.csv" test_path = "./data/iris/test.csv" save_path = "tmp/mlp_iris" # where the model's checkpoints are saved to numeric_feature_names = [ 'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)' ] # load train data and dev data train_data = CSVDataset(train_path, numeric_feature_names=numeric_feature_names, label_name="target") dev_data = CSVDataset(test_path, numeric_feature_names=numeric_feature_names, label_name="target") # initialize a model model = MLP(len(numeric_feature_names), train_data.num_label, hidden_units=[64, 32, 16], device="cpu") # initialize a trainer trainer = Trainer(train_data, model, dev_data=dev_data, eval_on="accuracy", loss_fn="ce",
# set the parameter gradients to zero optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) # propagate the loss backward loss.backward() # update the gradients optimizer.step() running_loss += loss.item() scheduler.step() print('Epoch', (e + 1), " loss:", running_loss / len(loader)) print('Done Training') if __name__ == "__main__": model = Net().to(device) df_train = pd.read_csv('pricing.csv', dtype=np.float32) train_labels = df_train.iloc[:, 0] train_data = df_train.iloc[:, 1:] train_dataset = CSVDataset(train_data, labels=train_labels, device=device) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) train(model, train_loader) torch.save(model.state_dict(), "model.pt")