def fit(self, n_output_node, input_shape, train_data, test_data, time_limit=24 * 60 * 60): """ Search the best CnnModule. Args: n_output_node: A integer value represent the number of output node in the final layer. input_shape: A tuple to express the shape of every train entry. For example, MNIST dataset would be (28,28,1) train_data: A PyTorch DataLoader instance represents the training data test_data: A PyTorch DataLoader instance represents the testing data time_limit: A integer value represents the time limit on searching for models. """ # Create the searcher and save on disk if not self.searcher: input_shape = input_shape[1:] self.searcher_args['n_output_node'] = n_output_node self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['metric'] = self.metric self.searcher_args['loss'] = self.loss self.searcher_args['verbose'] = self.verbose searcher = Searcher(**self.searcher_args) self._save_searcher(searcher) self.searcher = True start_time = time.time() time_remain = time_limit try: while time_remain > 0: searcher = pickle_from_file(os.path.join( self.path, 'searcher')) searcher.search(train_data, test_data, int(time_remain)) if len(self._load_searcher().history ) >= Constant.MAX_MODEL_NUM: break time_elapsed = time.time() - start_time time_remain = time_limit - time_elapsed # if no search executed during the time_limit, then raise an error if time_remain <= 0: raise TimeoutError except TimeoutError: if len(self._load_searcher().history) == 0: raise TimeoutError( "Search Time too short. No model was found during the search time." ) elif self.verbose: print('Time is out.')
def fit(self, n_output_node, input_shape, train_data, test_data, time_limit=24 * 60 * 60): # Create the searcher and save on disk if not self.searcher: input_shape = input_shape[1:] self.searcher_args['n_output_node'] = n_output_node self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['metric'] = self.metric self.searcher_args['loss'] = self.loss self.searcher_args['verbose'] = self.verbose searcher = Searcher(**self.searcher_args) self._save_searcher(searcher) self.searcher = True start_time = time.time() time_remain = time_limit try: while time_remain > 0: _run_searcher_once(train_data, test_data, self.path, int(time_remain)) if len(self._load_searcher().history ) >= Constant.MAX_MODEL_NUM: break time_elapsed = time.time() - start_time time_remain = time_limit - time_elapsed # if no search executed during the time_limit, then raise an error if time_remain <= 0: raise TimeoutError except TimeoutError: if len(self._load_searcher().history) == 0: raise TimeoutError( "Search Time too short. No model was found during the search time." ) elif self.verbose: print('Time is out.')
class NetworkModule: """ Class to create a network module. Attributes: loss: A function taking two parameters, the predictions and the ground truth. metric: An instance of the Metric subclasses. searcher_args: A dictionary containing the parameters for the searcher's __init__ function. searcher: An instance of the Searcher class. path: A string. The path to the directory to save the searcher. verbose: A boolean. Setting it to true prints to stdout. generators: A list of instances of the NetworkGenerator class or its subclasses. """ def __init__(self, loss, metric, searcher_args, path, verbose=False): self.searcher_args = searcher_args self.searcher = None self.path = path self.verbose = verbose self.loss = loss self.metric = metric self.generators = [] def fit(self, n_output_node, input_shape, train_data, test_data, time_limit=24 * 60 * 60): """ Search the best network. Args: n_output_node: A integer value represent the number of output node in the final layer. input_shape: A tuple to express the shape of every train entry. For example, MNIST dataset would be (28,28,1). train_data: A PyTorch DataLoader instance representing the training data. test_data: A PyTorch DataLoader instance representing the testing data. time_limit: A integer value represents the time limit on searching for models. """ # Create the searcher and save on disk if not self.searcher: input_shape = input_shape[1:] self.searcher_args['n_output_node'] = n_output_node self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['metric'] = self.metric self.searcher_args['loss'] = self.loss self.searcher_args['generators'] = self.generators self.searcher_args['verbose'] = self.verbose self.searcher = Searcher(**self.searcher_args) pickle_to_file(self, os.path.join(self.path, 'module')) start_time = time.time() time_remain = time_limit try: while time_remain > 0: self.searcher.search(train_data, test_data, int(time_remain)) pickle_to_file(self, os.path.join(self.path, 'module')) if len(self.searcher.history) >= Constant.MAX_MODEL_NUM: break time_elapsed = time.time() - start_time time_remain = time_limit - time_elapsed # if no search executed during the time_limit, then raise an error if time_remain <= 0: raise TimeoutError except TimeoutError: if len(self.searcher.history) == 0: raise TimeoutError( "Search Time too short. No model was found during the search time." ) elif self.verbose: print('Time is out.') def final_fit(self, train_data, test_data, trainer_args=None, retrain=False): """Final training after found the best architecture. Args: trainer_args: A dictionary containing the parameters of the ModelTrainer constructor. retrain: A boolean of whether reinitialize the weights of the model. train_data: A DataLoader instance representing the training data. test_data: A DataLoader instance representing the testing data. """ graph = self.searcher.load_best_model() if retrain: graph.weighted = False _, _1, graph = train(None, graph, train_data, test_data, trainer_args, self.metric, self.loss, self.verbose, self.path) self.searcher.replace_model(graph, self.searcher.get_best_model_id()) pickle_to_file(self, os.path.join(self.path, 'module')) @property def best_model(self): return self.searcher.load_best_model()
def fit(self, x_train=None, y_train=None, time_limit=None): """Find the best neural architecture and train it. Based on the given dataset, the function will find the best neural architecture for it. The dataset is in numpy.ndarray format. So they training data should be passed through `x_train`, `y_train`. Args: x_train: A numpy.ndarray instance containing the training data. y_train: A numpy.ndarray instance containing the label of the training data. time_limit: The time limit for the search in seconds. """ if y_train is None: y_train = [] if x_train is None: x_train = [] x_train = np.array(x_train) y_train = np.array(y_train).flatten() _validate(x_train, y_train) y_train = self.transform_y(y_train) # Transform x_train if self.data_transformer is None: self.data_transformer = DataTransformer(x_train, augment=self.augment) # Create the searcher and save on disk if not self.searcher: input_shape = x_train.shape[1:] self.searcher_args['n_output_node'] = self.get_n_output_node() self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['metric'] = self.metric self.searcher_args['loss'] = self.loss self.searcher_args['verbose'] = self.verbose searcher = Searcher(**self.searcher_args) self.save_searcher(searcher) self.searcher = True # Divide training data into training and testing data. x_train, x_test, y_train, y_test = train_test_split( x_train, y_train, test_size=min(Constant.VALIDATION_SET_SIZE, int(len(y_train) * 0.2)), random_state=42) # Wrap the data into DataLoaders train_data = self.data_transformer.transform_train(x_train, y_train) test_data = self.data_transformer.transform_test(x_test, y_test) # Save the classifier pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) pickle_to_file(self, os.path.join(self.path, 'classifier')) if time_limit is None: time_limit = 24 * 60 * 60 start_time = time.time() time_remain = time_limit try: while time_remain > 0: run_searcher_once(train_data, test_data, self.path, int(time_remain)) if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM: break time_elapsed = time.time() - start_time time_remain = time_limit - time_elapsed # if no search executed during the time_limit, then raise an error if time_remain <= 0: raise TimeoutError except TimeoutError: if len(self.load_searcher().history) == 0: raise TimeoutError( "Search Time too short. No model was found during the search time." ) elif self.verbose: print('Time is out.')
def fit_dataset(self, train_root, train_csv_file, test_root, test_csv_file, time_limit=None): """ :param train_data_root: :param test_data_root: :param time_limit: :return: """ # loading data train_dataset = MyData(csv_file=train_csv_file, root=train_root, test=False) train_data = torch.utils.data.DataLoader( train_dataset, # TODO batch_size=8, shuffle=True, pin_memory=True) test_dataset = MyData(csv_file=test_csv_file, root=test_root, test=True) test_data = torch.utils.data.DataLoader( test_dataset, # TODO batch_size=8, shuffle=False, pin_memory=True) # Create the searcher and save on disk if not self.searcher: input_shape = (224, 224, 3) self.searcher_args['n_output_node'] = 4 self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['metric'] = self.metric self.searcher_args['loss'] = self.loss self.searcher_args['verbose'] = self.verbose searcher = Searcher(**self.searcher_args) self.save_searcher(searcher) self.searcher = True # Save the classifier pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) pickle_to_file(self, os.path.join(self.path, 'classifier')) if time_limit is None: time_limit = 24 * 60 * 60 start_time = time.time() time_remain = time_limit try: while time_remain > 0: run_searcher_once(train_data, test_data, self.path, int(time_remain)) if len(self.load_searcher().history) >= Constant.MAX_MODEL_NUM: break time_elapsed = time.time() - start_time time_remain = time_limit - time_elapsed # if no search executed during the time_limit, then raise an error if time_remain <= 0: raise TimeoutError except TimeoutError: if len(self.load_searcher().history) == 0: raise TimeoutError( "Search Time too short. No model was found during the search time." ) elif self.verbose: print('Time is out.')
def fit(self, x_train=None, y_train=None, batch_size=None, time_limit=None): """Find the best neural architecture and train it. Based on the given dataset, the function will find the best neural architecture for it. The dataset is in numpy.ndarray format. So they training data should be passed through `x_train`, `y_train`. Args: x_train: A numpy.ndarray instance containing the training data. y_train: A numpy.ndarray instance containing the label of the training data. time_limit: The time limit for the search in seconds. """ if y_train is None: y_train = [] if x_train is None: x_train = [] if self.augment: x_train = text_preprocess(x_train, path=self.path) x_train = np.array(x_train) y_train = np.array(y_train) _validate(x_train, y_train) y_train = self.transform_y(y_train) if batch_size is None: batch_size = Constant.MAX_BATCH_SIZE # Create the searcher and save on disk if not self.searcher: input_shape = x_train.shape[1:] self.searcher_args['n_output_node'] = self.get_n_output_node() self.searcher_args['input_shape'] = input_shape self.searcher_args['path'] = self.path self.searcher_args['metric'] = self.metric self.searcher_args['loss'] = self.loss self.searcher_args['verbose'] = self.verbose searcher = Searcher(**self.searcher_args) self.save_searcher(searcher) self.searcher = True # Divide training data into training and testing data. x_train, x_test, y_train, y_test = train_test_split( x_train, y_train, test_size=min(Constant.VALIDATION_SET_SIZE, int(len(y_train) * 0.2)), random_state=42) # Wrap the data into DataLoaders train_data = text_dataloader(x_train, y_train, batch_size=batch_size, shuffle=True) test_data = text_dataloader(x_test, y_test, shuffle=True) # Save the classifier pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) pickle_to_file(self, os.path.join(self.path, 'classifier')) if time_limit is None: time_limit = 24 * 60 * 60 self.cnn.fit(self.get_n_output_node(), x_train.shape, train_data, test_data, time_limit)