def train_loop(self, input_fn): metric_container = MetricContainer(metrics=['1', 'b', 'c'], track_average_epoch_count=5) metric_container = MetricContainer(metrics=[{ 'metrics': ['a', 'b', 'c'] }, { 'metrics': ['2', 'd', 'e'], 'track_average_epoch_count': 10 }], track_average_epoch_count=5) self.log("calling input fn") input_fn() for epoch in iterator(range(6)): for idx in iterator(range(6), 2): metric_container.a.update(idx) metric_container.b.update(idx * 2) self.log("Epoch: {} step: {}".format(epoch, idx)) self.log("a {}".format(metric_container.a.avg())) self.log("b {}".format(metric_container.b.avg())) if idx % 3 == 0: metric_container.reset() metric_container.log_metrics(['a', '2']) metric_container.reset_epoch() metric_container.log_metrics() self.log("trained: {}".format(self.model.train())) self.copy_related_files("experiments/exports")
def evaluate_loop(self, input_fn, **kwargs): metricContainer = MetricContainer(['test_accuracy', 'test_oov_accuracy', "test_utt_accuracy", "test_oov_utt_accuracy"]) test_data, test_oov_data = input_fn # print(*test_data, *test_oov_data, sep='\n') self.log("Testing on test_data") for data in iterator(tqdm(test_data), 50): output = self._evaluate_dialogue(data) for _, utterance in data[0].iterrows(): if utterance['by'] != 'user': continue pred = list(self.model.get_prediction(utterance['utterance'])) pred = [p for p in pred if 'None' not in p] functions = get_functions_from_utterance(utterance) metricContainer.test_utt_accuracy.update( 1 if len(pred) == len(functions) and len(functions) == len( [k for k, v in pred if k in functions]) else 0, 1) metricContainer.test_accuracy.update(int(output), 1) self.log("Testing on test_data_oov") for data in iterator(tqdm(test_oov_data), 50): output = self._evaluate_dialogue(data) for _, utterance in data[0].iterrows(): if utterance['by'] != 'user': continue pred = list(self.model.get_prediction(utterance['utterance'])) pred = [p for p in pred if 'None' not in p] functions = get_functions_from_utterance(utterance) metricContainer.test_oov_utt_accuracy.update( 1 if len(pred) == len(functions) and len(functions) == len( [k for k, v in pred if k in functions]) else 0, 1) metricContainer.test_oov_accuracy.update(int(output), 1) return metricContainer
def train_loop(self, input_fn, **kwargs): metricContainer = MetricContainer(["loss"]) epochs = self.current_version.epocs self.log("Epochs: {}".format(epochs)) epochs_end = epochs - self.epochs_params - 1 if epochs_end < 1: epochs_end = 1 self.log("Remaining epochs: {}".format(epochs_end)) self.model.train() for epoch in iterator( range(self.epochs_params, self.epochs_params + epochs_end), 1): metricContainer.reset_epoch() import time ti = time.time() for idx, (name, i, oi) in iterator(enumerate(input_fn), 20): # print(ti - time.time()) i = i.cuda() oi = oi.cuda() out = self.model(oi) loss = self.criterion(out, i) self.optimizer.zero_grad() loss.backward() self.optimizer.step() metricContainer.loss.update(loss.item(), 1) if idx % 100 == 0: imshow_tensor(out, i.shape) out_string_step = "Epoch: {} Step: {}".format( epoch + 1, idx + 1) self.log("----------------------", log_to_file=False) self.log(out_string_step, log_to_file=False) metricContainer.log_metrics(log_to_file=False) metricContainer.reset() ti = time.time() if epoch % 5 == 0: self.save_checkpoint(epoch) self.log("=========== Epoch Stats: ===========", log_to_file=False) self.log(out_string_step, log_to_file=True) metricContainer.log_metrics(metrics=None, log_to_file=True, complete_epoch=True, items_per_row=5, charachters_per_row=100, step=epoch + 1) self.log("=========== Epoch Ends =============", log_to_file=False) metricContainer.reset_epoch()
def _compare_data(gt_data, predicted_data, root_dir=None): mc = MetricContainer(["precision", "recall", "dcg", "skipped"]) # comparator = CCAComparison() # sim = comparator.run_comparison(train_data.vectors[:50], ui_layout_vector.vectors[:50]) # print(sim) # if "conv" in str(root_dir): # n_proc = 2 # else: n_proc = 7 with multiprocessing.Pool(n_proc) as p: # if "conv" in str(root_dir): # map_fn = lambda p, i: map(p, i) # else: map_fn = lambda pr, i: p.imap_unordered(pr, i, chunksize=100) print(map_fn, n_proc) for out in tqdm(map_fn( _process(predicted_data, gt_data), iterator(predicted_data.name_to_idx.items(), None)), total=len(predicted_data.name_to_idx)): precision, recall, dcg, skipped = out if skipped: mc.skipped.update(1) continue else: mc.skipped.update(0) mc.precision.update(precision) mc.recall.update(recall) mc.dcg.update(dcg) # break # break mc.log_metrics()
def __call__(self, file_path): self.df = _load_data_to_pd(file_path) self.df = function_filter(self.df, self.function_filter) dialogue_ids = self.df['dialogue_id'].unique() dialogues = [] for dialogue_id in iterator(tqdm(dialogue_ids, "Loading dialogues: "), 20): dialogues.append(self._process_dialogue(dialogue_id)) return dialogues
def evaluate_loop(self, input_fn, **kwargs): metricContainer = MetricContainer(["loss"]) self.model.eval() for idx, (name, i) in iterator(enumerate(input_fn), 20): i = i.cuda() out = self.model(i) loss = self.criterion(out, i) metricContainer.loss.update(loss.item(), 1) return metricContainer
def post_execution_hook(self, mode, **kwargs): self.model.eval() out_location = Path("../exports") / self.current_version.name if not out_location.exists(): out_location.mkdir(parents=True) self.copy_related_files(out_location) self.log("Exporting data to : " + str(out_location)) torch.save({"state_dict": self.model.state_dict()}, out_location / "model.tch") self.log("Encoding train data") train_encodings = {} for name, i, oi in tqdm( iterator( self.dataloader.get_train_input( mode=ExecutionModeKeys.TEST), 1), total=self.dataloader.get_train_sample_count()): train_encodings[name] = self.model.encode( oi.cuda()).cpu().detach().numpy() with open(out_location / "train_encodings.npy", "wb") as f: np.save(f, train_encodings) self.log("Encoding test data") test_encodings = {} for name, i, oi in tqdm(iterator( self.dataloader.get_test_input(mode=ExecutionModeKeys.TEST), 1), total=self.dataloader.get_test_sample_count()): test_encodings[name] = self.model.encode( oi.cuda()).cpu().detach().numpy() with open(out_location / "test_encodings.npy", "wb") as f: np.save(f, test_encodings)
def evaluate_loop(self, input_fn, **kwargs): metricContainer = MetricContainer(self.metrics) if not self.current_version.generate_images: return metricContainer self.model.eval() self.model.cpu() for idx, (name, i, targets) in tqdm(iterator(enumerate(input_fn), 150)): i = torch.stack(i) # .cuda() out = self.model(i) # , targets=targets) # print(targets) # print(out) # imshow_tensor(i, i.shape, k=1) visualize_objects(name, i, out, targets, 10, labels_to_class=self.labels_to_class, colors=self.colors) # loss = self.criterion(out, i) # metricContainer.loss.update(loss.item(), 1) return metricContainer
def train_loop(self, input_fn, **kwargs): metricContainer = MetricContainer(self.metrics) epochs = self.current_version.epocs self.log("Epochs: {}".format(epochs)) epochs_end = epochs - self.epochs_params - 1 if epochs_end < 1: epochs_end = 1 self.log("Remaining epochs: {}".format(epochs_end), log_to_file=True) self.log("Steps per epoch: {}".format(len(input_fn)), log_to_file=True) self.model.train() for epoch in iterator( range(self.epochs_params, self.epochs_params + epochs_end), 1): metricContainer.reset_epoch() epoch_misses_loss = 0 epoch_misses_cuda = 0 for idx, (name, i, targets) in iterator(enumerate(input_fn), 20): i = torch.stack(i).cuda() try: out = self.model(i, targets=targets) loss = sum(list(out.values())) self.optimizer.zero_grad() loss.backward() if loss.item() > 50: val = loss.detach().item() self.log( "loss over threshold, breaking: {}".format(val), level=40, log_to_file=True) epoch_misses_loss += 1 continue self.optimizer.step() except RuntimeError as e: tb = traceback.format_exc() self.log(f"Failed step: RuntimeError {e}", level=40, log_to_file=True) # error self.log(f"traceback: \n {tb}", level=40, log_to_file=True) self.log( "Error happened with \nnames:{name}".format(name=name), level=40, log_to_file=True) self.log("Lengths: {}".format( [len(el["boxes"]) for el in targets]), level=40, log_to_file=True) epoch_misses_cuda += 1 metricContainer.update({k: v.item() for k, v in out.items()}, 1) metricContainer.loss.update(loss.item(), 1) if idx % 50 == 0: self.model.eval() name = name[:1] i = i[:1] targets = targets[:1] out = self.model(i) visualize_objects(name, i, out, targets, 10, labels_to_class=self.labels_to_class, colors=self.colors) self.model.train() if idx % 500 == 0: step = epoch * self.dataloader.get_train_sample_count( ) + idx out_string_step = "Epoch: {} Step: {}".format( epoch + 1, idx + 1) self.log("----------------------", log_to_file=False) self.log(out_string_step, log_to_file=False) metricContainer.log_metrics(log_to_file=True, step=step) metricContainer.reset() if idx % 6000 == 0 and idx > 0: self.save_checkpoint(epoch) self.save_checkpoint(epoch) self.log("=========== Epoch Stats: ===========", log_to_file=False) out_string_step = "Epoch: {} Step: {}".format(epoch + 1, idx + 1) self.log(out_string_step, log_to_file=True) metricContainer.log_metrics(metrics=None, log_to_file=True, complete_epoch=True, items_per_row=5, charachters_per_row=100, name_prefix="epoch_", step=epoch + 1) self.log(f"steps missed: {epoch_misses_loss + epoch_misses_cuda}", log_to_file=True) self.log(f" loss threshold: {epoch_misses_loss}", log_to_file=True) self.log(f" cuda memory: {epoch_misses_cuda}", log_to_file=True) self.log("=========== Epoch Ends =============", log_to_file=False) metricContainer.reset_epoch()