def LoadData(batch_size=16): train_lable, train_image = ReadTrain() test_lable, test_image = ReadTest() # reshep image set to (num, 1, 28, 28) train_image, train_lable = torch.from_numpy( train_image.reshape(-1, 1, 28, 28)).float(), torch.from_numpy( train_lable.astype(int)) test_image, test_lable = torch.from_numpy(test_image.reshape( -1, 1, 28, 28)).float(), torch.from_numpy(test_lable.astype(int)) train_set = dataset.TensorDataset(train_image, train_lable) test_set = dataset.TensorDataset(test_image, test_lable) kwargs = {'num_workers': 2, 'pin_memory': True} train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, **kwargs) test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True, **kwargs) return train_loader, test_loader
def Get_data(self): train, valid, test = pickle.load(open(self.path, 'rb')) train_dataset = dataset.TensorDataset(*train) test_dataset = dataset.TensorDataset(*test) if valid: valid_dataset = dataset.TensorDataset(*valid) return train_dataset, valid_dataset, test_dataset else: return train_dataset, test_dataset
def make_dataset(samples, predicates, labels, max_pred): """Add zero-padding and wrap as tensor dataset.""" sample_masks = [] sample_tensors = [] for sample in samples: sample_tensor, sample_mask = make_sample_tensor_mask(sample) sample_tensors.append(np.expand_dims(sample_tensor, 0)) sample_masks.append(np.expand_dims(sample_mask, 0)) sample_tensors = np.vstack(sample_tensors) sample_tensors = torch.FloatTensor(sample_tensors) sample_masks = np.vstack(sample_masks) sample_masks = torch.FloatTensor(sample_masks) L.debug( f'Sample tensor shape: {sample_tensors.shape}, mask shape: {sample_masks.shape}' ) predicate_masks = [] predicate_tensors = [] for predicate in predicates: predicate_tensor, predicate_mask = make_predicate_tensor_mask( predicate, max_pred) predicate_tensors.append(np.expand_dims(predicate_tensor, 0)) predicate_masks.append(np.expand_dims(predicate_mask, 0)) predicate_tensors = np.vstack(predicate_tensors) predicate_tensors = torch.FloatTensor(predicate_tensors) predicate_masks = np.vstack(predicate_masks) predicate_masks = torch.FloatTensor(predicate_masks) L.debug( f'Predicate tensor shape: {predicate_tensors.shape}, mask shape: {predicate_masks.shape}' ) target_tensor = torch.FloatTensor(labels) return dataset.TensorDataset(sample_tensors, predicate_tensors, target_tensor, sample_masks, predicate_masks)
def __init__(self, loader): """Initializes a new OneBatchLoaderWrapper instance. Args: loader: The torch.utils.DataLoader to wrap. """ self._exhausted = False self.dataset = dataset.TensorDataset(*next(iter(loader)))
def make_dataset(samples, predicates, joins, labels, max_num_joins, max_num_predicates): """Add zero-padding and wrap as tensor dataset.""" sample_masks = [] sample_tensors = [] for sample in samples: sample_tensor = np.vstack(sample) num_pad = max_num_joins + 1 - sample_tensor.shape[0] sample_mask = np.ones_like(sample_tensor).mean(1, keepdims=True) sample_tensor = np.pad(sample_tensor, ((0, num_pad), (0, 0)), 'constant') sample_mask = np.pad(sample_mask, ((0, num_pad), (0, 0)), 'constant') sample_tensors.append(np.expand_dims(sample_tensor, 0)) sample_masks.append(np.expand_dims(sample_mask, 0)) sample_tensors = np.vstack(sample_tensors) sample_tensors = torch.FloatTensor(sample_tensors) sample_masks = np.vstack(sample_masks) sample_masks = torch.FloatTensor(sample_masks) predicate_masks = [] predicate_tensors = [] for predicate in predicates: predicate_tensor = np.vstack(predicate) num_pad = max_num_predicates - predicate_tensor.shape[0] predicate_mask = np.ones_like(predicate_tensor).mean(1, keepdims=True) predicate_tensor = np.pad(predicate_tensor, ((0, num_pad), (0, 0)), 'constant') predicate_mask = np.pad(predicate_mask, ((0, num_pad), (0, 0)), 'constant') predicate_tensors.append(np.expand_dims(predicate_tensor, 0)) predicate_masks.append(np.expand_dims(predicate_mask, 0)) predicate_tensors = np.vstack(predicate_tensors) predicate_tensors = torch.FloatTensor(predicate_tensors) predicate_masks = np.vstack(predicate_masks) predicate_masks = torch.FloatTensor(predicate_masks) join_masks = [] join_tensors = [] for join in joins: join_tensor = np.vstack(join) num_pad = max_num_joins - join_tensor.shape[0] join_mask = np.ones_like(join_tensor).mean(1, keepdims=True) join_tensor = np.pad(join_tensor, ((0, num_pad), (0, 0)), 'constant') join_mask = np.pad(join_mask, ((0, num_pad), (0, 0)), 'constant') join_tensors.append(np.expand_dims(join_tensor, 0)) join_masks.append(np.expand_dims(join_mask, 0)) join_tensors = np.vstack(join_tensors) join_tensors = torch.FloatTensor(join_tensors) join_masks = np.vstack(join_masks) join_masks = torch.FloatTensor(join_masks) target_tensor = torch.FloatTensor(labels) return dataset.TensorDataset(sample_tensors, predicate_tensors, join_tensors, target_tensor, sample_masks, predicate_masks, join_masks)
def gen_dataloader(data, word_dict, arg): tp = vectorize(data, word_dict, arg.N) x, y, e1, e2, e1d2, e2d1, zd, d1, d2 = tp y_t = torch.LongTensor(np.array(y).astype(np.int64)) zd = np.array(zd).reshape(-1, 1) e1, e1d2, d1 = np.array(e1).reshape(-1, 1), np.array(e1d2).reshape(-1, 1), np.array(d1) e2, e2d1, d2 = np.array(e2).reshape(-1, 1), np.array(e2d1).reshape(-1, 1), np.array(d2) np_cat = np.concatenate((x, e1, e1d2, e2, e2d1, zd, d1, d2), 1) d_t = torch.from_numpy(np_cat.astype(np.int64)) ds = dataset.TensorDataset(d_t, y_t) return DataLoader(ds, arg.BATCH_SIZE, True)
def __init__(self, loader): """Initializes a new OneBatchLoaderWrapper instance. Args: loader: The torch.utils.DataLoader to wrap. We assume the loader returns tuples of batches where each item in the tuple has batch_size as the first dimension. We do not impose a restriction on the size of the tuple. E.g., (X), (X, Y), (X, Y, Z), ... are all valid tuples as long as X.shape[0] == Y.shape[0] == Z.shape[0] == batch_size. """ self._exhausted = False batch = next(iter(loader)) self.dataset = dataset.TensorDataset(*[item[:1] for item in batch])
def __extract_feature(train=True, force_new=False): path_x = os.path.join(ROOT, 'tmp', 'fea_x.npy') path_y = os.path.join(ROOT, 'tmp', 'fea_y.npy') path_type = os.path.join(ROOT, 'tmp', 'trainval_split_.npy') if not force_new and os.path.exists(path_x) and os.path.exists( path_y) and os.path.exists(path_type): x = np.load(path_x) y = np.load(path_y) trainval_split = np.load(path_type) flag = trainval_split if train else ~trainval_split x = torch.FloatTensor(x[flag]) y = torch.LongTensor(y[flag]) else: x, y, mask, trainval_split = __read() trainval = torch.LongTensor(trainval_split.astype(np.int32)) mask = torch.LongTensor(mask) x = torch.LongTensor(x) y = torch.LongTensor(y) temp = dataset.TensorDataset(x, y, mask, trainval) # if os.path.exists(os.path.join(PROOT, 'chinese_L-12_H-768_A-12', 'pytorch_model.bin')): # bert_model = BertModel.from_pretrained(os.path.join(PROOT, 'chinese_L-12_H-768_A-12')).cuda() # else: # bert_model = BertModel.from_pretrained(os.path.join(PROOT, 'chinese_L-12_H-768_A-12'), from_tf=True) # torch.save(bert_model.state_dict(), os.path.join(PROOT, 'chinese_L-12_H-768_A-12', 'pytorch_model.bin')) # bert_model = BertModel.from_pretrained(os.path.join(PROOT, 'chinese_L-12_H-768_A-12')).cuda() bert_model = BertModel.from_pretrained('bert-base-chinese').cuda() bert_model = torch.nn.DataParallel(bert_model) bert_model.eval() Xs = [] ys = [] types = [] for x, y, m, t in tqdm( DataLoader(temp, batch_size=200, shuffle=False, num_workers=2)): with autograd.no_grad(): _, pooled = bert_model(x.cuda(), attention_mask=m.cuda(), output_all_encoded_layers=False) Xs.append(pooled.cpu()) ys.append(y) types.append(t) x = torch.cat(Xs, dim=0).numpy() y = torch.cat(ys, dim=0).numpy() types = torch.cat(types, dim=0).numpy().astype(np.bool) np.save(path_x, x) np.save(path_y, y) np.save(path_type, types) flag = types if train else ~types x = torch.FloatTensor(x[flag]) y = torch.LongTensor(y[flag]) return x, y
def get_loader(train=True, batch_size=200, force_new=False): torch_dataset = __cache.get('train' if train else 'val') if torch_dataset: return DataLoader(torch_dataset, batch_size=batch_size, shuffle=True, num_workers=2) torch_dataset = dataset.TensorDataset( *__extract_feature(train, force_new=force_new)) __cache.setdefault('train' if train else 'val', torch_dataset) return DataLoader(torch_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
def make_dataset(samples, predicates, labels, max_num_predicates): """Add zero-padding and wrap as tensor dataset.""" sample_masks = [] sample_tensors = [] for sample in samples: sample_tensor = np.vstack(sample) # wangxy: # joins + 1 >= # tables, for single table # join = 0 num_pad = 0 + 1 - sample_tensor.shape[0] assert num_pad == 0, num_pad sample_mask = np.ones_like(sample_tensor).mean(1, keepdims=True) sample_tensor = np.pad(sample_tensor, ((0, num_pad), (0, 0)), 'constant') sample_mask = np.pad(sample_mask, ((0, num_pad), (0, 0)), 'constant') sample_tensors.append(np.expand_dims(sample_tensor, 0)) sample_masks.append(np.expand_dims(sample_mask, 0)) sample_tensors = np.vstack(sample_tensors) sample_tensors = torch.FloatTensor(sample_tensors) sample_masks = np.vstack(sample_masks) sample_masks = torch.FloatTensor(sample_masks) # print(sample_tensors.shape, sample_masks.shape) predicate_masks = [] predicate_tensors = [] for predicate in predicates: predicate_tensor = np.vstack(predicate) num_pad = max_num_predicates - predicate_tensor.shape[0] predicate_mask = np.ones_like(predicate_tensor).mean(1, keepdims=True) predicate_tensor = np.pad(predicate_tensor, ((0, num_pad), (0, 0)), 'constant') predicate_mask = np.pad(predicate_mask, ((0, num_pad), (0, 0)), 'constant') predicate_tensors.append(np.expand_dims(predicate_tensor, 0)) predicate_masks.append(np.expand_dims(predicate_mask, 0)) predicate_tensors = np.vstack(predicate_tensors) predicate_tensors = torch.FloatTensor(predicate_tensors) predicate_masks = np.vstack(predicate_masks) predicate_masks = torch.FloatTensor(predicate_masks) # print(predicate_tensors.shape, predicate_masks.shape) target_tensor = torch.FloatTensor(labels) return dataset.TensorDataset(sample_tensors, predicate_tensors, target_tensor, sample_masks, predicate_masks)
def dataset(build=False): if not build and os.path.exists(DATASET_FILE): return torch.load(DATASET_FILE) # take a network, initialize it to be suuuper chaotic, then just run # some random data through it os.makedirs('datasets', exist_ok=True) mdl = model(REFERENCE_SIZE) # make the model really chaotic mdl.rnn.weight_hh_l0.data = (16 / math.sqrt(REFERENCE_SIZE)) * torch.randn( (mdl.rnn.weight_hh_l0.shape)) mdl.rnn.weight_hh_l0.data += torch.eye(REFERENCE_SIZE) starts = torch.rand((NUM_SEQUENCES, STREAM_SIZE)) with torch.no_grad(): seqs = mdl(starts) full = dset.TensorDataset(starts, seqs) tr, vl = iutils.split(full, 0.1) torch.save((tr, vl), DATASET_FILE) return tr, vl
#read test dataset # If you just trained a model, save as a .pth file and # then run this test.py immediately, the test dataset # you are using is exactly all data that haven't been used in training # if you need a new random test dataset, please Run datasplit.py # Images_test = np.uint8(np.load('Images_test.npy')) Labels_test = np.load('Labels_test.npy') print('Finish loading Data and Label') print("Data preprocessing") #transform the data tensor_Images_test = torch.stack([data_transform(Images_test[i]) for i in range(len(Images_test))]) tensor_Labels_test = torch.stack([torch.Tensor(np.array([Labels_test[i]])) for i in range(len(Labels_test))]) #create dataset test_dataset = Dataset.TensorDataset(tensor_Images_test,tensor_Labels_test) #creat dataloader test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 32) #load model weights net = models.resnet50(num_classes=5) model_weight_path = "./SGD100.pth" net.load_state_dict(torch.load(model_weight_path, map_location=device)) print('Finish loading Model') #predict class print('Start Predicting') net.eval() all_preds = torch.tensor([]) with torch.no_grad():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) #turn Images_train and Images_validation into uint8 Images_train = np.uint8(Images_train) Images_validation = np.uint8(Images_validation) #transform to data, including normalization (-1,1) and changing to tensor data tensor_Images_train = torch.stack([data_transform(Images_train[i]) for i in range(len(Images_train))]) tensor_Labels_train = torch.stack([torch.Tensor(np.array([Labels_train[i]])) for i in range(len(Labels_train))]) tensor_Images_validation = torch.stack([data_transform(Images_validation[i]) for i in range(len(Images_validation))]) tensor_Labels_validation = torch.stack([torch.Tensor(np.array([Labels_validation[i]])) for i in range(len(Labels_validation))]) #create datset batch_size = 32 train_dataset = Dataset.TensorDataset(tensor_Images_train,tensor_Labels_train) validation_dataset = Dataset.TensorDataset(tensor_Images_validation,tensor_Labels_validation) val_num = len(validation_dataset) #create dataloader train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=0) validatation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=0) ### --------------------------------------- ### ### Model Configuration ### ## Setting 1: Transfer Learning net = models.resnet50() model_weight_path = "./resnet50-pre.pth" net.load_state_dict(torch.load(model_weight_path), strict=False) in_channel = net.fc.in_features net.fc = nn.Linear(in_channel, 5)
from tensorflow.keras.preprocessing import sequence from tensorflow.keras.datasets import imdb import tensorflow.keras as keras from pytorch_nndct.apis import torch_quantizer #torch.set_default_dtype(torch.double) DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") top_words = 5000 # vocab size (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) #(X_train, y_train), (X_test, y_test) = imdb.load_data(path="./imdb.npz", num_words=top_words) max_review_length = 500 X_test = sequence.pad_sequences(X_test, maxlen=max_review_length) test_data = dataset.TensorDataset(torch.LongTensor(X_test), torch.Tensor(y_test)) if args.subset_len: subset_len = args.subset_len assert subset_len <= len(test_data) test_data = torch.utils.data.Subset(test_data, list(range(subset_len))) #train_loader = DataLoader(train_data, batch_size=50, shuffle=True) test_loader = dataloader.DataLoader(test_data, batch_size=50, shuffle=False) class Model(nn.Module): def __init__(self, max_words, emb_size, hid_size): super(Model, self).__init__() self.max_words = max_words self.emb_size = emb_size self.hid_size = hid_size
def make_dataset(samples, predicates, joins, joins_v1, predicates_uri, labels, max_num_joins, max_num_v1_joins, max_num_predicates, max_num_predicates_uris): """ Add zero-padding and wrap as tensor dataset. :param samples: :param predicates: :param joins: :param joins_v1: New version of joins in the way zeros for predFrom + zeros from predTo + zeros from type join :param predicates_uri: :param labels: :param max_num_joins: :param max_num_v1_joins: :param max_num_predicates: :return: """ sample_masks = [] sample_tensors = [] for sample in samples: sample_tensor = np.vstack(sample) # num_pad = max_num_joins + 1 - sample_tensor.shape[0] num_pad = max_num_v1_joins + 1 - sample_tensor.shape[0] sample_mask = np.ones_like(sample_tensor).mean(1, keepdims=True) sample_tensor = np.pad(sample_tensor, ((0, num_pad), (0, 0)), 'constant') sample_mask = np.pad(sample_mask, ((0, num_pad), (0, 0)), 'constant') sample_tensors.append(np.expand_dims(sample_tensor, 0)) sample_masks.append(np.expand_dims(sample_mask, 0)) sample_tensors = np.vstack(sample_tensors) sample_tensors = torch.FloatTensor(sample_tensors) sample_masks = np.vstack(sample_masks) sample_masks = torch.FloatTensor(sample_masks) predicate_masks = [] predicate_tensors = [] for predicate in predicates: predicate_tensor = np.vstack(predicate) num_pad = max_num_predicates - predicate_tensor.shape[0] predicate_mask = np.ones_like(predicate_tensor).mean(1, keepdims=True) predicate_tensor = np.pad(predicate_tensor, ((0, num_pad), (0, 0)), 'constant') predicate_mask = np.pad(predicate_mask, ((0, num_pad), (0, 0)), 'constant') predicate_tensors.append(np.expand_dims(predicate_tensor, 0)) predicate_masks.append(np.expand_dims(predicate_mask, 0)) predicate_tensors = np.vstack(predicate_tensors) predicate_tensors = torch.FloatTensor(predicate_tensors) predicate_masks = np.vstack(predicate_masks) predicate_masks = torch.FloatTensor(predicate_masks) predicate_uri_masks = [] predicate_uri_tensors = [] for predicate in predicates_uri: predicate_tensor = np.vstack(predicate) num_pad = max_num_predicates_uris - predicate_tensor.shape[0] predicate_mask = np.ones_like(predicate_tensor).mean(1, keepdims=True) predicate_tensor = np.pad(predicate_tensor, ((0, num_pad), (0, 0)), 'constant') predicate_mask = np.pad(predicate_mask, ((0, num_pad), (0, 0)), 'constant') predicate_uri_tensors.append(np.expand_dims(predicate_tensor, 0)) predicate_uri_masks.append(np.expand_dims(predicate_mask, 0)) predicate_uri_tensors = np.vstack(predicate_uri_tensors) predicate_uri_tensors = torch.FloatTensor(predicate_uri_tensors) predicate_uri_masks = np.vstack(predicate_uri_masks) predicate_uri_masks = torch.FloatTensor(predicate_uri_masks) join_masks = [] join_tensors = [] # for join in joins: for join in joins_v1: join_tensor = np.vstack(join) # num_pad = max_num_joins - join_tensor.shape[0] num_pad = max_num_v1_joins - join_tensor.shape[0] join_mask = np.ones_like(join_tensor).mean(1, keepdims=True) join_tensor = np.pad(join_tensor, ((0, num_pad), (0, 0)), 'constant') join_mask = np.pad(join_mask, ((0, num_pad), (0, 0)), 'constant') join_tensors.append(np.expand_dims(join_tensor, 0)) join_masks.append(np.expand_dims(join_mask, 0)) join_tensors = np.vstack(join_tensors) join_tensors = torch.FloatTensor(join_tensors) join_masks = np.vstack(join_masks) join_masks = torch.FloatTensor(join_masks) target_tensor = torch.FloatTensor(labels) return dataset.TensorDataset(sample_tensors, predicate_tensors, join_tensors, predicate_uri_tensors, target_tensor, sample_masks, predicate_masks, join_masks, predicate_uri_masks)