Exemplo n.º 1
0
def name_seq_pairs_to_dbg(pairs, k, tqdm=None, ignore_short=False):
    """build de bruijn graph from sequences

    pairs is an iter of (name, seq) pairs (iter[(str, str)]); name can be None
    """
    dbg = nx.DiGraph()
    for (name, seq) in pairs:
        if tqdm is not None:
            tqdm.update()
        if ignore_short and len(seq) < k:
            continue
        update_debruijn_graph(dbg, k, seq, name)
    return dbg
Exemplo n.º 2
0
def _tqdm_update(tqdm, postfix=None):
    """
    Try to update tqdm, or do nothing
    """
    if tqdm is None:
        return
    tqdm.update(1)
    if postfix is not None:
        if isinstance(postfix, dict):
            postfix = {
                k: _truncate(v, justify="l")
                for k, v in postfix.items()
            }
            tqdm.set_postfix(postfix)
            return
        tqdm.set_postfix_str(_truncate(postfix))
Exemplo n.º 3
0
    def evaluate_with_error_rates(self, iterator, tqdm):
        all_orig = []
        all_predicted = []
        results = {}
        self.diacritizer.set_model(self.model)
        evaluated_batches = 0
        tqdm.set_description(f"Calculating DER/WER {self.global_step}: ")
        for batch in iterator:
            if evaluated_batches > int(self.config["error_rates_n_batches"]):
                break

            predicted = self.diacritizer.diacritize_batch(batch)
            all_predicted += predicted
            all_orig += batch["original"]
            tqdm.update()

        summary_texts = []
        orig_path = os.path.join(self.config_manager.prediction_dir,
                                 f"original.txt")
        predicted_path = os.path.join(self.config_manager.prediction_dir,
                                      f"predicted.txt")

        with open(orig_path, "w", encoding="utf8") as file:
            for sentence in all_orig:
                file.write(f"{sentence}\n")

        with open(predicted_path, "w", encoding="utf8") as file:
            for sentence in all_predicted:
                file.write(f"{sentence}\n")

        for i in range(int(self.config["n_predicted_text_tensorboard"])):
            if i > len(all_predicted):
                break

            summary_texts.append(
                (f"eval-text/{i}", f"{ all_orig[i]} |->  {all_predicted[i]}"))

        results["DER"] = der.calculate_der_from_path(orig_path, predicted_path)
        results["DER*"] = der.calculate_der_from_path(orig_path,
                                                      predicted_path,
                                                      case_ending=False)
        results["WER"] = wer.calculate_wer_from_path(orig_path, predicted_path)
        results["WER*"] = wer.calculate_wer_from_path(orig_path,
                                                      predicted_path,
                                                      case_ending=False)
        tqdm.reset()
        return results, summary_texts
Exemplo n.º 4
0
    def evaluate(self, iterator, tqdm, use_target=True):
        epoch_loss = 0
        epoch_acc = 0
        self.model.eval()
        tqdm.set_description(f"Eval: {self.global_step}")
        with torch.no_grad():
            for batch_inputs in iterator:
                batch_inputs["src"] = batch_inputs["src"].to(self.device)
                batch_inputs["lengths"] = batch_inputs["lengths"].to("cpu")
                if use_target:
                    batch_inputs["target"] = batch_inputs["target"].to(
                        self.device)
                else:
                    batch_inputs["target"] = None

                outputs = self.model(
                    src=batch_inputs["src"],
                    target=batch_inputs["target"],
                    lengths=batch_inputs["lengths"],
                )

                predictions = outputs["diacritics"]

                predictions = predictions.view(-1, predictions.shape[-1])
                targets = batch_inputs["target"]
                targets = targets.view(-1)
                loss = self.criterion(predictions, targets.to(self.device))
                acc = categorical_accuracy(predictions,
                                           targets.to(self.device),
                                           self.pad_idx)
                epoch_loss += loss.item()
                epoch_acc += acc.item()
                tqdm.update()

        tqdm.reset()
        return epoch_loss / len(iterator), epoch_acc / len(iterator)
Exemplo n.º 5
0
    def tqdm_update(self, count=None, mode=None):
        tqdm = self.tqdm if mode is None else self.tqdm_w
        if self.show_progress:
            if count:
                tqdm.update(count)

            elif tqdm.n + self.chunksize < self.count:
                tqdm.update(self.chunksize)

            else:
                tqdm.update(self.count - tqdm.n)
Exemplo n.º 6
0
 def update(self, n):
     if not self.disable:
         tqdm.update(self, n - self.n)
Exemplo n.º 7
0
    def run(self):
        scaler = torch.cuda.amp.GradScaler()
        train_iterator, _, validation_iterator = load_iterators(
            self.config_manager)
        print("data loaded")
        print("----------------------------------------------------------")
        tqdm_eval = trange(0, len(validation_iterator), leave=True)
        tqdm_error_rates = trange(0, len(validation_iterator), leave=True)
        tqdm_eval.set_description("Eval")
        tqdm_error_rates.set_description("WER/DER : ")
        tqdm = trange(self.global_step,
                      self.config["max_steps"] + 1,
                      leave=True)

        for batch_inputs in repeater(train_iterator):
            tqdm.set_description(f"Global Step {self.global_step}")
            if self.config["use_decay"]:
                self.lr = self.adjust_learning_rate(
                    self.optimizer, global_step=self.global_step)
            self.optimizer.zero_grad()
            if self.device == "cuda" and self.config["use_mixed_precision"]:
                with autocast():
                    step_results = self.run_one_step(batch_inputs)
                    scaler.scale(step_results["loss"]).backward()
                    scaler.unscale_(self.optimizer)
                    if self.config.get("CLIP"):
                        torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                       self.config["CLIP"])

                    scaler.step(self.optimizer)

                    scaler.update()
            else:
                step_results = self.run_one_step(batch_inputs)

                loss = step_results["loss"]
                loss.backward()
                if self.config.get("CLIP"):
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   self.config["CLIP"])
                self.optimizer.step()

            self.losses.append(step_results["loss"].item())

            self.print_losses(step_results, tqdm)

            self.summary_manager.add_scalar("meta/learning_rate",
                                            self.lr,
                                            global_step=self.global_step)

            if self.global_step % self.config["model_save_frequency"] == 0:
                torch.save(
                    {
                        "global_step": self.global_step,
                        "model_state_dict": self.model.state_dict(),
                        "optimizer_state_dict": self.optimizer.state_dict(),
                    },
                    os.path.join(
                        self.config_manager.models_dir,
                        f"{self.global_step}-snapshot.pt",
                    ),
                )

            if self.global_step % self.config["evaluate_frequency"] == 0:
                loss, acc = self.evaluate(validation_iterator, tqdm_eval)
                self.summary_manager.add_scalar("evaluate/loss",
                                                loss,
                                                global_step=self.global_step)
                self.summary_manager.add_scalar("evaluate/acc",
                                                acc,
                                                global_step=self.global_step)
                tqdm.display(
                    f"Evaluate {self.global_step}: accuracy, {acc}, loss: {loss}",
                    pos=8)
                self.model.train()

            if (self.global_step %
                    self.config["evaluate_with_error_rates_frequency"] == 0):
                error_rates, summery_texts = self.evaluate_with_error_rates(
                    validation_iterator, tqdm_error_rates)
                if error_rates:
                    WER = error_rates["WER"]
                    DER = error_rates["DER"]
                    DER1 = error_rates["DER*"]
                    WER1 = error_rates["WER*"]

                    self.summary_manager.add_scalar(
                        "error_rates/WER",
                        WER / 100,
                        global_step=self.global_step,
                    )
                    self.summary_manager.add_scalar(
                        "error_rates/DER",
                        DER / 100,
                        global_step=self.global_step,
                    )
                    self.summary_manager.add_scalar(
                        "error_rates/DER*",
                        DER1 / 100,
                        global_step=self.global_step,
                    )
                    self.summary_manager.add_scalar(
                        "error_rates/WER*",
                        WER1 / 100,
                        global_step=self.global_step,
                    )

                    error_rates = f"DER: {DER}, WER: {WER}, DER*: {DER1}, WER*: {WER1}"
                    tqdm.display(f"WER/DER {self.global_step}: {error_rates}",
                                 pos=9)

                    for tag, text in summery_texts:
                        self.summary_manager.add_text(tag, text)

                self.model.train()

            if self.global_step % self.config["train_plotting_frequency"] == 0:
                self.plot_attention(step_results)

            self.report(step_results, tqdm)

            self.global_step += 1
            if self.global_step > self.config["max_steps"]:
                print("Training Done.")
                return

            tqdm.update()