def train(self, dataset_path, features=None, target=None, **kwargs): # Record features & target self._features = features self._target = target # Load CSV file as pandas dataframe csv_path = dataset_path data = pd.read_csv(csv_path) # Extract X & y from dataframe (X, y) = self._extract_xy(data) # Encode categorical features X = self._encoding_categorical_type(X) num_class = y.unique().size self._clf = self._build_classifier(self.n_estimators, self.min_child_weight, \ self.max_depth, self.gamma, self.subsample, self.colsample_bytree, num_class) self._clf.fit(X, y) # Compute train accuracy score = self._clf.score(X, y) logger.log('Train accuracy: {}'.format(score))
def train(self, dataset_uri): dataset = dataset_utils.load_dataset_of_corpus(dataset_uri) (sents_tokens, sents_tags) = zip(*[zip(*sent) for sent in dataset]) self._num_tags = dataset.tag_num_classes[0] (self._trans_probs, self._emiss_probs) = self._compute_probs(self._num_tags, sents_tokens, sents_tags) logger.log('No. of tags: {}'.format(self._num_tags))
def train(self, dataset_uri): dataset = dataset_utils.load_dataset_of_image_files(dataset_uri) (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset]) X = self._prepare_X(images) y = classes self._clf.fit(X, y) # Compute train accuracy preds = self._clf.predict(X) accuracy = sum(y == preds) / len(y) logger.log('Train accuracy: {}'.format(accuracy))
def evaluate(self, dataset_uri): im_sz = self._knobs.get('image_size') dataset = dataset_utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz]) (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset]) images = np.asarray(images) classes = np.asarray(classes) with self._graph.as_default(): with self._sess.as_default(): (loss, accuracy) = self._model.evaluate(images, classes) logger.log('Test loss: {}'.format(loss)) return accuracy
def _train(self, dataset): N = self._knobs.get('batch_size') ep = self._knobs.get('epochs') null_tag = self._tag_count # Tag to ignore (from padding of sentences during batching) B = math.ceil(len(dataset) / N) # No. of batches # Define 2 plots: Loss against time, loss against epochs logger.define_loss_plot() logger.define_plot('Loss Over Time', ['loss']) (net, optimizer) = self._create_model() Tensor = torch.LongTensor if torch.cuda.is_available(): logger.log('Using CUDA...') net = net.cuda() Tensor = torch.cuda.LongTensor loss_func = nn.CrossEntropyLoss(ignore_index=null_tag) for epoch in range(ep): total_loss = 0 for i in range(B): # Extract batch from dataset (words_tsr, tags_tsr) = self._prepare_batch(dataset, i * N, i * N + N, Tensor) # Reset gradients for this batch optimizer.zero_grad() # Forward propagate batch through model probs_tsr = net(words_tsr) # Compute sum of per-word loss for all words & sentences NW = probs_tsr.size(0) * probs_tsr.size(1) loss = loss_func(probs_tsr.view(NW, -1), tags_tsr.view(-1)) # Backward propagate on minibatch loss.backward() # Update gradients with optimizer optimizer.step() total_loss += loss.item() logger.log_loss(loss=(total_loss / B), epoch=epoch) return (net, optimizer)
def train(self, dataset_path, features=None, target=None): dataset = dataset_utils.load_dataset_of_tabular(dataset_path) data = dataset.data if features is None: X = data.iloc[:, :-1] else: X = data[features] if target is None: y = data.iloc[:, -1] else: y = data[target] # Encode categorical features X = self._category_encoding_type(X, y) self._clf.fit(X, y) # Compute train root mean square error preds = self._clf.predict(X) rmse = np.sqrt(mean_squared_error(y, preds)) logger.log('Train RMSE: {}'.format(rmse))
def train(self, dataset_path, features=None, target=None, **kwargs): # Record features & target self._features = features self._target = target # Load CSV file as pandas dataframe csv_path = dataset_path data = pd.read_csv(csv_path) # Extract X & y from dataframe (X, y) = self._extract_xy(data) # Encode categorical features X = self._encoding_categorical_type(X) self._clf.fit(X, y) # Compute train root mean square error preds = self._clf.predict(X) rmse = np.sqrt(mean_squared_error(y, preds)) logger.log('Train RMSE: {}'.format(rmse))
def train(self, dataset_uri): im_sz = self._knobs.get('image_size') bs = self._knobs.get('batch_size') ep = self._knobs.get('epochs') logger.log('Available devices: {}'.format(str(device_lib.list_local_devices()))) # Define 2 plots: Loss against time, loss against epochs logger.define_loss_plot() logger.define_plot('Loss Over Time', ['loss']) dataset = dataset_utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz]) num_classes = dataset.classes (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset]) images = np.asarray(images) classes = np.asarray(classes) with self._graph.as_default(): self._model = self._build_model(num_classes) with self._sess.as_default(): self._model.fit( images, classes, verbose=0, epochs=ep, batch_size=bs, callbacks=[ tf.keras.callbacks.LambdaCallback(on_epoch_end=self._on_train_epoch_end) ] ) # Compute train accuracy (loss, accuracy) = self._model.evaluate(images, classes) logger.log('Train loss: {}'.format(loss)) logger.log('Train accuracy: {}'.format(accuracy))
def _predict(self, dataset): N = self._knobs.get('batch_size') net = self._net B = math.ceil(len(dataset) / N) # No. of batches word_count = len(self._word_dict) null_word = word_count Tensor = torch.LongTensor if torch.cuda.is_available(): logger.log('Using CUDA...') net = net.cuda() Tensor = torch.cuda.LongTensor sents_pred_tags = [] for i in range(B): # Extract batch from dataset (words_tsr, _) = self._prepare_batch(dataset, i * N, i * N + N, Tensor, has_tags=False) # Forward propagate batch through model probs_tsr = net(words_tsr) # Compute sum of per-word loss for all words & sentences _, preds_tsr = torch.max(probs_tsr, dim=2) # Populate predictions for (sent_preds_tsr, sent_words_tsr) in zip(preds_tsr, words_tsr): sent_pred_tags = [] for (pred, word) in zip(sent_preds_tsr, sent_words_tsr): if word.item() == null_word: break sent_pred_tags.append(pred.item()) sents_pred_tags.append(sent_pred_tags) return sents_pred_tags
def train(self, dataset_path, features=None, target=None): dataset = dataset_utils.load_dataset_of_tabular(dataset_path) data = dataset.data if features is None: X = data.iloc[:, :-1] else: X = data[features] if target is None: y = data.iloc[:, -1] else: y = data[target] # Encode categorical features X = self._category_encoding_type(X, y) num_class = y.unique().size self._clf = self._build_classifier(self.n_estimators, self.min_child_weight, \ self.max_depth, self.gamma, self.subsample, self.colsample_bytree, num_class) self._clf.fit(X, y) # Compute train accuracy score = self._clf.score(X, y) logger.log('Train accuracy: {}'.format(score))
def train(self, dataset_uri): dataset = dataset_utils.load_dataset_of_corpus(dataset_uri) self._word_dict = self._extract_word_dict(dataset) self._tag_count = dataset.tag_num_classes[0] logger.log('No. of unique words: {}'.format(len(self._word_dict))) logger.log('No. of tags: {}'.format(self._tag_count)) (self._net, self._optimizer) = self._train(dataset) sents_tags = self._predict(dataset) acc = self._compute_accuracy(dataset, sents_tags) logger.log('Train accuracy: {}'.format(acc))