def dataset_subjects(self): data = self.data["dataset_subjects"] self.use_validation = bool(data["validation_subjects"]) self.train_subjets, other_subjets = split_data_by_len(self.subjects.copy_list(), data["train_subjects"]) # Use validation subjects self.test_subjets = other_subjets if self.use_validation: self.validation_subjets, self.test_subjets = split_data_by_len(other_subjets, data["test_subjects"])
#exp_path = os.path.join("Users","migue","Documents","GitHub","Datasets","Experimento") experiments_paths = get_paths_experiment(exp_path) experiments = {} for folder, file_paths in experiments_paths.items(): experiments[folder] = Experiment(folder, file_paths) subjects = experiment_to_subject(experiments) shuffle_subjets = subjects.copy_list() tokenizer = Tokenizer(shuffle_subjets, window_size=1024, stride=512) shuffle(shuffle_subjets) channel_iters = 80 #print("Se Cargaron los datos") train_subjets, other_subjets = split_data_by_len(shuffle_subjets, 18) #validation_subjets, test_subjets = split_data_by_len(other_subjets,3) # Datasets # get_consecutive_dataset # get_same_channel_dataset # get_same_subject_dataset # Train Dataset target_cod = {"positive": 1, "negative": 0} train_data_generator = DataGen(other_subjets, tokenizer, combinate_subjects=True, channels_iter=channel_iters, targets_cod=target_cod) data_train = train_data_generator.get_tiny_custom_channel_dataset(50) print("Entrenamiento") print(train_data_generator.dataset_metadata)
exp_path = "C:/Users/migue/Documents/GitHub/Datasets/Experimento" #exp_path = os.path.join("Users","migue","Documents","GitHub","Datasets","Experimento") experiments_paths = get_paths_experiment(exp_path) experiments = {} for folder, file_paths in experiments_paths.items(): experiments[folder] = Experiment(folder, file_paths) subjects = experiment_to_subject(experiments) shuffle_subjets = subjects.copy_list() shuffle(shuffle_subjets) print("Se Cargaron los datos") train_subjets, other_subjets = split_data_by_len(shuffle_subjets, 14) validation_subjets, test_subjets = split_data_by_len(other_subjets, 3) tokenizer = Tokenizer(test_subjets, window_size=1024, stride=256) # Datasets # get_consecutive_dataset # get_same_channel_dataset # get_same_subject_dataset # Train Dataset #train_data_generator = DataGen(train_subjets, tokenizer) #data_train = train_data_generator.get_same_channel_dataset() # Validation Dataset #validation_data_generator = DataGen(validation_subjets, tokenizer) #data_validation = validation_data_generator.get_same_channel_dataset() # Test Dataset
def dataset_subjects(self): data = self.data["dataset_subjects"] self.train_subjets, other_subjets = split_data_by_len( self.subjects.copy_list(), data["train_subjects"]) self.validation_subjets, self.test_subjets = split_data_by_len( other_subjets, data["test_subjects"])