def load_data(self,sample_percent,batch_size,data_type="train"): self.batch_size = batch_size self.sample_percent = sample_percent if data_type == "train": cut_num = 20000 else: cut_num = 5000 # Loading Data loc = np.load(self.dataset_path + '/loc_' + data_type + self.suffix + '.npy', allow_pickle=True)[:cut_num] vel = np.load(self.dataset_path + '/vel_' + data_type + self.suffix + '.npy', allow_pickle=True)[:cut_num] edges = np.load(self.dataset_path + '/edges_' + data_type + self.suffix + '.npy', allow_pickle=True)[:cut_num] # [500,5,5] times = np.load(self.dataset_path + '/times_' + data_type + self.suffix + '.npy', allow_pickle=True)[:cut_num] # 【500,5] self.num_graph = loc.shape[0] self.num_atoms = loc.shape[1] self.feature = loc[0][0][0].shape[0] + vel[0][0][0].shape[0] print("number graph in "+data_type+" is %d" % self.num_graph) print("number atoms in " + data_type + " is %d" % self.num_atoms) if self.suffix == "_springs5" or self.suffix == "_charged5": # Normalize features to [-1, 1], across test and train dataset if self.max_loc == None: loc, max_loc, min_loc = self.normalize_features(loc, self.num_atoms) # [num_sims,num_atoms, (timestamps,2)] vel, max_vel, min_vel = self.normalize_features(vel, self.num_atoms) self.max_loc = max_loc self.min_loc = min_loc self.max_vel = max_vel self.min_vel = min_vel else: loc = (loc - self.min_loc) * 2 / (self.max_loc - self.min_loc) - 1 vel = (vel - self.min_vel) * 2 / (self.max_vel - self.min_vel) - 1 else: self.timelength = 49 # split data w.r.t interp and extrap, also normalize times if self.mode=="interp": loc_en,vel_en,times_en = self.interp_extrap(loc,vel,times,self.mode,data_type) loc_de = loc_en vel_de = vel_en times_de = times_en elif self.mode == "extrap": loc_en,vel_en,times_en,loc_de,vel_de,times_de = self.interp_extrap(loc,vel,times,self.mode,data_type) #Encoder dataloader series_list_observed, loc_observed, vel_observed, times_observed = self.split_data(loc_en, vel_en, times_en) if self.mode == "interp": time_begin = 0 else: time_begin = 1 encoder_data_loader, graph_data_loader = self.transfer_data(loc_observed, vel_observed, edges, times_observed, time_begin=time_begin) # Graph Dataloader --USING NRI edges = np.reshape(edges, [-1, self.num_atoms ** 2]) edges = np.array((edges + 1) / 2, dtype=np.int64) edges = torch.LongTensor(edges) # Exclude self edges off_diag_idx = np.ravel_multi_index( np.where(np.ones((self.num_atoms, self.num_atoms)) - np.eye(self.num_atoms)), [self.num_atoms, self.num_atoms]) edges = edges[:, off_diag_idx] graph_data_loader = Loader(edges, batch_size=self.batch_size) # Decoder Dataloader if self.mode=="interp": series_list_de = series_list_observed elif self.mode == "extrap": series_list_de = self.decoder_data(loc_de,vel_de,times_de) decoder_data_loader = Loader(series_list_de, batch_size=self.batch_size * self.num_atoms, shuffle=False, collate_fn=lambda batch: self.variable_time_collate_fn_activity( batch)) # num_graph*num_ball [tt,vals,masks] num_batch = len(decoder_data_loader) encoder_data_loader = utils.inf_generator(encoder_data_loader) graph_data_loader = utils.inf_generator(graph_data_loader) decoder_data_loader = utils.inf_generator(decoder_data_loader) return encoder_data_loader, decoder_data_loader, graph_data_loader, num_batch
def parse_datasets(args, device, test_batch_size = 50): def basic_collate_fn(batch, time_steps, args = args, device = device, data_type = "train"): batch = torch.stack(batch) data_dict = { "data": batch, "time_steps": time_steps} data_dict = utils.split_and_subsample_batch(data_dict, args, data_type = data_type) return data_dict dataset_name = args.dataset n_total_tp = args.timepoints + args.extrap max_t_extrap = args.max_t / args.timepoints * n_total_tp ################################################################## # MuJoCo dataset if dataset_name == "hopper": dataset_obj = HopperPhysics(root='data', download=True, generate=False, device = device) dataset = dataset_obj.get_dataset()[:args.n] dataset = dataset.to(device) n_tp_data = dataset[:].shape[1] # Time steps that are used later on for exrapolation time_steps = torch.arange(start=0, end = n_tp_data, step=1).float().to(device) time_steps = time_steps / len(time_steps) dataset = dataset.to(device) time_steps = time_steps.to(device) if not args.extrap: # Creating dataset for interpolation # sample time points from different parts of the timeline, # so that the model learns from different parts of hopper trajectory n_traj = len(dataset) n_tp_data = dataset.shape[1] n_reduced_tp = args.timepoints # sample time points from different parts of the timeline, # so that the model learns from different parts of hopper trajectory start_ind = np.random.randint(0, high=n_tp_data - n_reduced_tp +1, size=n_traj) end_ind = start_ind + n_reduced_tp sliced = [] for i in range(n_traj): sliced.append(dataset[i, start_ind[i] : end_ind[i], :]) dataset = torch.stack(sliced).to(device) time_steps = time_steps[:n_reduced_tp] # Split into train and test by the time sequences train_y, test_y = utils.split_train_test(dataset, train_fraq = 0.8) n_samples = len(dataset) input_dim = dataset.size(-1) batch_size = min(args.batch_size, args.n) train_dataloader = DataLoader(train_y, batch_size = batch_size, shuffle=False, collate_fn= lambda batch: basic_collate_fn(batch, time_steps, data_type = "train")) test_dataloader = DataLoader(test_y, batch_size = n_samples, shuffle=False, collate_fn= lambda batch: basic_collate_fn(batch, time_steps, data_type = "test")) data_objects = {"dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader)} return data_objects ################################################################## # Physionet dataset if dataset_name == "physionet": train_dataset_obj = PhysioNet('data/physionet', train=True, quantization = args.quantization, download=True, n_samples = min(10000, args.n), device = device) # Use custom collate_fn to combine samples with arbitrary time observations. # Returns the dataset along with mask and time steps test_dataset_obj = PhysioNet('data/physionet', train=False, quantization = args.quantization, download=True, n_samples = min(10000, args.n), device = device) # Combine and shuffle samples from physionet Train and physionet Test total_dataset = train_dataset_obj[:len(train_dataset_obj)] if not args.classif: # Concatenate samples from original Train and Test sets # Only 'training' physionet samples are have labels. Therefore, if we do classifiction task, we don't need physionet 'test' samples. total_dataset = total_dataset + test_dataset_obj[:len(test_dataset_obj)] # Shuffle and split train_data, test_data = model_selection.train_test_split(total_dataset, train_size= 0.8, random_state = 42, shuffle = True) record_id, tt, vals, mask, labels = train_data[0] n_samples = len(total_dataset) input_dim = vals.size(-1) batch_size = min(min(len(train_dataset_obj), args.batch_size), args.n) data_min, data_max = get_data_min_max(total_dataset) train_dataloader = DataLoader(train_data, batch_size= batch_size, shuffle=False, collate_fn= lambda batch: physionet_collate_fn(batch, args, device, data_type = "train", data_min = data_min, data_max = data_max)) test_dataloader = DataLoader(test_data, batch_size = 100, shuffle=False, collate_fn= lambda batch: physionet_collate_fn(batch, args, device, data_type = "test", data_min = data_min, data_max = data_max)) attr_names = train_dataset_obj.params data_objects = {"dataset_obj": train_dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader), "attr": attr_names, #optional "classif_per_tp": False, #optional "n_labels": 1} #optional return data_objects ################################################################## # Human activity dataset if dataset_name == "activity": n_samples = min(10000, args.n) dataset_obj = PersonActivity('data/PersonActivity', download=True, n_samples = n_samples, device = device) print(dataset_obj) # Use custom collate_fn to combine samples with arbitrary time observations. # Returns the dataset along with mask and time steps # Shuffle and split train_data, test_data = model_selection.train_test_split(dataset_obj, train_size= 0.8, random_state = 42, shuffle = True) train_data = [train_data[i] for i in np.random.choice(len(train_data), len(train_data))] test_data = [test_data[i] for i in np.random.choice(len(test_data), len(test_data))] record_id, tt, vals, mask, labels = train_data[0] input_dim = vals.size(-1) batch_size = min(min(len(dataset_obj), args.batch_size), args.n) train_dataloader = DataLoader(train_data, batch_size= batch_size, shuffle=False, collate_fn= lambda batch: activity_collate_fn(batch, args, device, data_type = "train")) test_dataloader = DataLoader(test_data, batch_size=n_samples, shuffle=False, collate_fn= lambda batch: activity_collate_fn(batch, args, device, data_type = "test")) data_objects = {"dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader), "classif_per_tp": True, #optional "n_labels": labels.size(-1)} return data_objects ################################################################## # MELD dataset if dataset_name == "meld": n_samples = min(10000, args.n) dataset_obj = MELD('data', download=True, n_samples = n_samples, device = device) print(dataset_obj) # Use custom collate_fn to combine samples with arbitrary time observations. # Returns the dataset along with mask and time steps # Shuffle and split #train_data, test_data = model_selection.train_test_split(dataset_obj, train_size= 0.8, # random_state = 42, shuffle = True) train_data = dataset_obj.train_data #dev_data = dataset_obj.dev_data test_data = dataset_obj.test_data train_data = [train_data[i] for i in np.random.choice(len(train_data), len(train_data), replace=False)] #dev_data = [dev_data[i] for i in np.random.choice(len(dev_data), len(dev_data))] test_data = [test_data[i] for i in np.random.choice(len(test_data), len(test_data), replace=False)] vals, _, _, _ = train_data[0] input_dim = vals.size(-1) batch_size = min(min(len(dataset_obj), args.batch_size), args.n) train_dataloader = DataLoader(train_data, batch_size= batch_size, shuffle=False, collate_fn= lambda batch: meld_collate_fn(batch, args, device, data_type = "train")) #dev_dataloader = DataLoader(dev_data, batch_size= n_samples, shuffle=False, # collate_fn= lambda batch: meld_collate_fn(batch, args, device, data_type = "test")) test_dataloader = DataLoader(test_data, batch_size = n_samples, shuffle=False, collate_fn= lambda batch: meld_collate_fn(batch, args, device, data_type = "test")) data_objects = {"dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), #"dev_dataloader": utils.inf_generator(dev_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim+1, "n_train_batches": len(train_dataloader), #"n_dev_batches": len(dev_dataloader), "n_test_batches": len(test_dataloader), "classif_per_tp": True, "n_labels": 7} return data_objects ########### 1d datasets ########### # Sampling args.timepoints time points in the interval [0, args.max_t] # Sample points for both training sequence and explapolation (test) distribution = uniform.Uniform(torch.Tensor([0.0]),torch.Tensor([max_t_extrap])) time_steps_extrap = distribution.sample(torch.Size([n_total_tp-1]))[:,0] time_steps_extrap = torch.cat((torch.Tensor([0.0]), time_steps_extrap)) time_steps_extrap = torch.sort(time_steps_extrap)[0] dataset_obj = None ################################################################## # Sample a periodic function if dataset_name == "periodic": dataset_obj = Periodic_1d( init_freq = None, init_amplitude = 1., final_amplitude = 1., final_freq = None, z0 = 1.) ################################################################## if dataset_obj is None: raise Exception("Unknown dataset: {}".format(dataset_name)) dataset = dataset_obj.sample_traj(time_steps_extrap, n_samples = args.n, noise_weight = args.noise_weight) # Process small datasets dataset = dataset.to(device) time_steps_extrap = time_steps_extrap.to(device) train_y, test_y = utils.split_train_test(dataset, train_fraq = 0.8) n_samples = len(dataset) input_dim = dataset.size(-1) batch_size = min(args.batch_size, args.n) train_dataloader = DataLoader(train_y, batch_size = batch_size, shuffle=False, collate_fn= lambda batch: basic_collate_fn(batch, time_steps_extrap, data_type = "train")) test_dataloader = DataLoader(test_y, batch_size = args.n, shuffle=False, collate_fn= lambda batch: basic_collate_fn(batch, time_steps_extrap, data_type = "test")) data_objects = {#"dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader)} return data_objects
def parse_datasets(args, device): def basic_collate_fn(batch, time_steps, args=args, device=device, data_type="train"): batch = torch.stack(batch) data_dict = {"data": batch, "time_steps": time_steps} data_dict = utils.split_and_subsample_batch(data_dict, args, data_type=data_type) return data_dict # def custom_collate_fn(batch, time_steps, args=args, device = device, data) dataset_name = args.dataset n_total_tp = args.timepoints + args.extrap max_t_extrap = args.max_t / args.timepoints * n_total_tp ################################################################## # MuJoCo dataset if dataset_name == "hopper": dataset_obj = HopperPhysics(root='data', download=True, generate=False, device=device) dataset = dataset_obj.get_dataset()[:args.n] dataset = dataset.to(device) n_tp_data = dataset[:].shape[1] # Time steps that are used later on for exrapolation time_steps = torch.arange(start=0, end=n_tp_data, step=1).float().to(device) time_steps = time_steps / len(time_steps) dataset = dataset.to(device) time_steps = time_steps.to(device) if not args.extrap: # Creating dataset for interpolation # sample time points from different parts of the timeline, # so that the model learns from different parts of hopper trajectory n_traj = len(dataset) n_tp_data = dataset.shape[1] n_reduced_tp = args.timepoints # sample time points from different parts of the timeline, # so that the model learns from different parts of hopper trajectory start_ind = np.random.randint(0, high=n_tp_data - n_reduced_tp + 1, size=n_traj) end_ind = start_ind + n_reduced_tp sliced = [] for i in range(n_traj): sliced.append(dataset[i, start_ind[i]:end_ind[i], :]) dataset = torch.stack(sliced).to(device) time_steps = time_steps[:n_reduced_tp] # Split into train and test by the time sequences train_y, test_y = utils.split_train_test(dataset, train_fraq=0.8) n_samples = len(dataset) input_dim = dataset.size(-1) if args.mcar or args.mnar: collate_fn_train = lambda batch: data_ampute_batch_collate( batch, time_steps, args, device, data_type="train") collate_fn_test = lambda batch: data_ampute_batch_collate( batch, time_steps, args, device, data_type="test") else: collate_fn_train = lambda batch: basic_collate_fn( batch, time_steps, data_type="train") collate_fn_test = lambda batch: basic_collate_fn( batch, time_steps, data_type="test") batch_size = min(args.batch_size, args.n) train_dataloader = DataLoader(train_y, batch_size=batch_size, shuffle=False, collate_fn=collate_fn_train) test_dataloader = DataLoader(test_y, batch_size=n_samples, shuffle=False, collate_fn=collate_fn_test) data_objects = { "dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader) } return data_objects ################################################################## # Physionet dataset if dataset_name == "physionet": train_dataset_obj = PhysioNet('data/physionet', train=True, quantization=args.quantization, download=True, n_samples=min(10000, args.n), device=device) # Use custom collate_fn to combine samples with arbitrary time observations. # Returns the dataset along with mask and time steps test_dataset_obj = PhysioNet('data/physionet', train=False, quantization=args.quantization, download=True, n_samples=min(10000, args.n), device=device) # Combine and shuffle samples from physionet Train and physionet Test total_dataset = train_dataset_obj[:len(train_dataset_obj)] if not args.classif: # Concatenate samples from original Train and Test sets # Only 'training' physionet samples are have labels. Therefore, if we do classifiction task, we don't need physionet 'test' samples. total_dataset = total_dataset + test_dataset_obj[:len( test_dataset_obj)] # Shuffle and split train_data, test_data = model_selection.train_test_split( total_dataset, train_size=0.8, random_state=42, shuffle=True) record_id, tt, vals, mask, labels = train_data[0] n_samples = len(total_dataset) input_dim = vals.size(-1) batch_size = min(min(len(train_dataset_obj), args.batch_size), args.n) data_min, data_max = get_data_min_max(total_dataset) train_dataloader = DataLoader( train_data, batch_size=batch_size, shuffle=False, collate_fn=lambda batch: variable_time_collate_fn( batch, args, device, data_type="train", data_min=data_min, data_max=data_max)) test_dataloader = DataLoader( test_data, batch_size=n_samples, shuffle=False, collate_fn=lambda batch: variable_time_collate_fn( batch, args, device, data_type="test", data_min=data_min, data_max=data_max)) attr_names = train_dataset_obj.params data_objects = { "dataset_obj": train_dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader), "attr": attr_names, #optional "classif_per_tp": False, #optional "n_labels": 1 } #optional return data_objects ################################################################## # Human activity dataset if dataset_name == "activity": n_samples = min(10000, args.n) dataset_obj = PersonActivity('data/PersonActivity', download=True, n_samples=n_samples, device=device) print(dataset_obj) # Use custom collate_fn to combine samples with arbitrary time observations. # Returns the dataset along with mask and time steps # Shuffle and split train_data, test_data = model_selection.train_test_split( dataset_obj, train_size=0.8, random_state=42, shuffle=True) train_data = [ train_data[i] for i in np.random.choice(len(train_data), len(train_data)) ] test_data = [ test_data[i] for i in np.random.choice(len(test_data), len(test_data)) ] record_id, tt, vals, mask, labels = train_data[0] input_dim = vals.size(-1) batch_size = min(min(len(dataset_obj), args.batch_size), args.n) train_dataloader = DataLoader( train_data, batch_size=batch_size, shuffle=False, collate_fn=lambda batch: variable_time_collate_fn_activity( batch, args, device, data_type="train")) test_dataloader = DataLoader( test_data, batch_size=n_samples, shuffle=False, collate_fn=lambda batch: variable_time_collate_fn_activity( batch, args, device, data_type="test")) data_objects = { "dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader), "classif_per_tp": True, #optional "n_labels": labels.size(-1) } return data_objects if dataset_name == "markov_chain": def generate_data(num_samples, length): from hmmlearn import hmm model = hmm.GaussianHMM(n_components=3, covariance_type="full") model.startprob_ = np.array([0.6, 0.3, 0.1]) model.transmat_ = np.array([[0.7, 0.2, 0.1], [0.3, 0.5, 0.2], [0.3, 0.3, 0.4]]) model.means_ = np.array([[0.0, 0.0], [3.0, -3.0], [5.0, -5.0]]) model.covars_ = np.tile(np.identity(2), (3, 1, 1)) dataset = [] for i in range(num_samples): X, Z = model.sample(length) X = np.reshape(X[:, 0], (length, 1)) dataset.append(torch.tensor(X)) return torch.stack(dataset) dataset = generate_data(args.n, n_total_tp).float() time_steps_extrap = torch.arange(n_total_tp).float() dataset = dataset.to(device) time_steps_extrap = time_steps_extrap.to(device) train_y, test_y = utils.split_train_test(dataset, train_fraq=0.8) n_samples = len(dataset) input_dim = dataset.size(-1) if args.mcar or args.mnar: collate_fn_train = lambda batch: data_ampute_batch_collate( batch, time_steps_extrap, args, device, data_type="train") collate_fn_test = lambda batch: data_ampute_batch_collate( batch, time_steps_extrap, args, device, data_type="test") else: collate_fn_train = lambda batch: basic_collate_fn( batch, time_steps_extrap, data_type="train") collate_fn_test = lambda batch: basic_collate_fn( batch, time_steps_extrap, data_type="test") batch_size = min(args.batch_size, args.n) train_dataloader = DataLoader(train_y, batch_size=batch_size, shuffle=False, collate_fn=collate_fn_train) test_dataloader = DataLoader(test_y, batch_size=args.n, shuffle=False, collate_fn=collate_fn_test) data_objects = { #"dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader) } return data_objects ########### 1d datasets ########### # Sampling args.timepoints time points in the interval [0, args.max_t] # Sample points for both training sequence and explapolation (test) distribution = uniform.Uniform(torch.Tensor([0.0]), torch.Tensor([max_t_extrap])) time_steps_extrap = distribution.sample(torch.Size([n_total_tp - 1]))[:, 0] time_steps_extrap = torch.cat((torch.Tensor([0.0]), time_steps_extrap)) time_steps_extrap = torch.sort(time_steps_extrap)[0] dataset_obj = None ################################################################## # Sample a periodic function if dataset_name == "periodic": dataset_obj = Periodic_1d(init_freq=None, init_amplitude=1., final_amplitude=1., final_freq=None, z0=1.) ################################################################## if dataset_obj is None: raise Exception("Unknown dataset: {}".format(dataset_name)) # def make_quick_plot(data,time_steps_extrap, index): # import matplotlib.pyplot as plt # plt.figure() # plt.scatter(time_steps_extrap, data[index, :, 0]) # plt.title(f"Dataset at index {index}") # plt.savefig(f"dataset_at_index_{index}.png") dataset = dataset_obj.sample_traj(time_steps_extrap, n_samples=args.n, noise_weight=args.noise_weight) # for i in range(3): # make_quick_plot(dataset, time_steps_extrap, i) # Process small datasets dataset = dataset.to(device) time_steps_extrap = time_steps_extrap.to(device) train_y, test_y = utils.split_train_test(dataset, train_fraq=0.8) n_samples = len(dataset) input_dim = dataset.size(-1) if args.mcar or args.mnar: collate_fn_train = lambda batch: data_ampute_batch_collate( batch, time_steps_extrap, args, device, data_type="train") collate_fn_test = lambda batch: data_ampute_batch_collate( batch, time_steps_extrap, args, device, data_type="test") else: collate_fn_train = lambda batch: basic_collate_fn( batch, time_steps_extrap, data_type="train") collate_fn_test = lambda batch: basic_collate_fn( batch, time_steps_extrap, data_type="test") batch_size = min(args.batch_size, args.n) train_dataloader = DataLoader(train_y, batch_size=batch_size, shuffle=False, collate_fn=collate_fn_train) test_dataloader = DataLoader(test_y, batch_size=args.n, shuffle=False, collate_fn=collate_fn_test) data_objects = { #"dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader) } return data_objects
def parse_datasets(args, device): def basic_collate_fn(batch, time_steps, args=args, data_type='train'): batch = torch.stack(batch) data_dict = {'data': batch, 'time_steps': time_steps} data_dict = utils.split_and_subsample_batch(data_dict, args, data_type=data_type) return data_dict dataset_name = args.dataset n_total_tp = args.timepoints + args.extrap max_t_extrap = args.max_t / args.timepoints * n_total_tp if dataset_name == 'hopper': dataset_obj = HopperPhysics(root='data', download=True, generate=False, device=device) dataset = dataset_obj.get_dataset()[:args.n] dataset = dataset.to(device) n_tp_data = dataset[:].shape[1] time_steps = torch.arange(start=0, end=n_tp_data, step=1).float().to(device) time_steps = time_steps / len(time_steps) dataset = dataset.to(device) time_steps = time_steps.to(device) if not args.extrap: n_traj = len(dataset) n_tp_data = dataset.shape[1] n_reduced_tp = args.timepoints start_ind = np.random.randint(0, high=n_tp_data - n_reduced_tp + 1, size=n_traj) end_ind = start_ind + n_reduced_tp sliced = [] for i in range(n_traj): sliced.append(dataset[i, start_ind[i], end_ind[i], :]) dataset = torch.stack(sliced).to(device) time_steps = time_steps[:n_reduced_tp] train_y, test_y = utils.split_train_test(dataset, train_fraq=0.8) n_samples = len(dataset) input_dim = dataset.size(-1) batch_size = min(args.batch_size, args.n) train_dataloader = DataLoader( train_y, batch_size=batch_size, shuffle=False, collate_fn=lambda batch: basic_collate_fn( batch, time_steps, data_type="train")) test_dataloader = DataLoader(test_y, batch_size=n_samples, shuffle=False, collate_fn=lambda batch: basic_collate_fn( batch, time_steps, data_type="test")) data_objects = { "dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader) } return data_objects ########### 1d datasets ########### # Sampling args.timepoints time points in the interval [0, args.max_t] # Sample points for both training sequence and explapolation (test) distribution = uniform.Uniform(torch.Tensor([0.0]), torch.Tensor([max_t_extrap])) time_steps_extrap = distribution.sample(torch.Size([n_total_tp - 1]))[:, 0] time_steps_extrap = torch.cat((torch.Tensor([0.0]), time_steps_extrap)) time_steps_extrap = torch.sort(time_steps_extrap)[0] dataset_obj = None ################################################################## # Sample a periodic function if dataset_name == "periodic": dataset_obj = Periodic_1d(init_freq=None, init_amplitude=1., final_amplitude=1., final_freq=None, z0=1.) dataset = dataset_obj.sample_traj(time_steps_extrap, n_samples=args.n, noise_weight=args.noise_weight) # Process small datasets dataset = dataset.to(device) time_steps_extrap = time_steps_extrap.to(device) train_y, test_y = utils.split_train_test(dataset, train_fraq=0.8) n_samples = len(dataset) input_dim = dataset.size(-1) batch_size = min(args.batch_size, args.n) train_dataloader = DataLoader( train_y, batch_size=batch_size, shuffle=False, collate_fn=lambda batch: basic_collate_fn( batch, time_steps_extrap, data_type="train")) test_dataloader = DataLoader( test_y, batch_size=args.n, shuffle=False, collate_fn=lambda batch: basic_collate_fn( batch, time_steps_extrap, data_type="test")) data_objects = { #"dataset_obj": dataset_obj, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader) } return data_objects
time_steps_extrap = time_steps_extrap.to(device) train_y, test_y = utils.split_train_test(dataset, train_fraq = 0.8) n_samples = len(dataset) input_dim = dataset.size(-1) batch_size = min(args.batch_size, args.n) train_dataloader = DataLoader(train_data, batch_size= batch_size, shuffle=False, collate_fn= lambda batch: variable_time_collate_fn_activity(batch, args, device, data_type = "train")) test_dataloader = DataLoader(test_data, batch_size=n_samples, shuffle=False, collate_fn= lambda batch: variable_time_collate_fn_activity(batch, args, device, data_type = "test")) data_obj = {"dataset_obj": dataset, "train_dataloader": utils.inf_generator(train_dataloader), "test_dataloader": utils.inf_generator(test_dataloader), "input_dim": input_dim, "n_train_batches": len(train_dataloader), "n_test_batches": len(test_dataloader)} # Create the model obsrv_std = 0.01 if args.dataset == "hopper": obsrv_std = 1e-3 obsrv_std = torch.Tensor([obsrv_std]).to(device) z0_prior = Normal(torch.Tensor([0.0]).to(device), torch.Tensor([1.]).to(device)) if args.rnn_vae: