Пример #1
0
def main(args):
    set_seed(args.seed)

    elapsed, context_predictions = context_selection(args,
                                                     args.context_config_json)
    logger.info(
        f"Finished context selection, get {len(context_predictions)} paragraphs"
        f"({elapsed:.2f}s elapsed)")

    elapsed, qa_predictions = question_answering(args, args.qa_config_json,
                                                 context_predictions)
    logger.info(
        f"Finished question_answering, get {len(qa_predictions)} answer spans"
        f"({elapsed:.2f}s elapsed)")

    predictions = {
        d["id"]: d["answer"]
        for d in map(postprocess, qa_predictions)
    }
    json_dump(predictions, args.predict_json, ensure_ascii=False)

    if args.tmp_dir:
        args.tmp_dir.mkdir(parents=True, exist_ok=True)
        pickle_dump(context_predictions,
                    args.tmp_dir / "context_predictions.pkl")
        pickle_dump(qa_predictions, args.tmp_dir / "qa_predictions.pkl")
Пример #2
0
def saveReconstructions(reconstructions, fn):
    """
    :type reconstructions: list[scripts.myopensfm.types.Reconstruction]
    :type fn: str
    """
    # with open(fn, 'wb') as f:
    with open(fn, 'w') as f:    # for python3
        mylogger.logger.info("saving " + fn)
        obj = io.reconstructions_to_json(reconstructions)
        io.json_dump(obj, f)
        mylogger.logger.info("Done")
Пример #3
0
def main(args):
    set_seed(args.seed)

    logger.info(
        f"Loading training data from {args.dataset_dir / 'train.json'}...")
    all_data = json_load(args.dataset_dir / "train.json")

    logger.info("Random shuffling the data...")
    random.shuffle(all_data)

    train_size = int(args.train_ratio * len(all_data))
    val_size = len(all_data) - train_size
    logger.info(f"Splitting the dataset into [{train_size}, {val_size}] sizes")

    train_data, val_data = all_data[:train_size], all_data[train_size:]

    json_dump(train_data, args.dataset_dir / "train_splitted.json")
    json_dump(val_data, args.dataset_dir / "val_splitted.json")
Пример #4
0
    def train(self, train_dataloader, val_dataloader):
        logger.info(f"Training model for {self.total_epochs} epochs...")
        training_log = []
        for epoch in range(self.cur_epoch, self.total_epochs + 1):
            self.cur_epoch = epoch

            logger.info(f"Epoch {epoch:2d} / {self.total_epochs:2d}")
            train_time, (train_loss,
                         train_metrics) = self.run_epoch(train_dataloader,
                                                         split="train",
                                                         train=True,
                                                         epoch=epoch)
            logger.info(
                f"Train | {train_time:8.3f}s | loss: {train_loss:.3f} | "
                f"{self.format_metrics(train_metrics)}")

            val_time, (val_loss, val_metrics) = self.run_epoch(val_dataloader,
                                                               split="val",
                                                               epoch=epoch)
            logger.info(f"Val   | {val_time:8.3f}s | loss: {val_loss:.3f} | "
                        f"{self.format_metrics(val_metrics)}")

            if epoch % self.checkpoint_freq == 0:
                try:  # In order to bypass meow1 / meow2 disk full issue
                    self.save_checkpoint(self.checkpoint_dir /
                                         f"checkpoint_{epoch:03d}.pt")
                except OSError as e:
                    logger.warning(
                        f"Trying to save a checkpoint, but '{e}' occured")

            training_log.append({
                "epoch": epoch,
                "train_time": train_time,
                "train_loss": train_loss,
                "train_metrics": train_metrics,
                "val_time": val_time,
                "val_loss": val_loss,
                "val_metrics": val_metrics,
            })

        self.model.save_weights(self.checkpoint_dir / "model_weights.pt")
        json_dump(training_log, self.checkpoint_dir / "train_log.json")
Пример #5
0
    def __init__(
        self,
        contexts: List[str],
        data: List[dict],
        tokenizer: Optional[BertTokenizer] = None,
        test: bool = False,
        include_nonrelevant=0,
        split_name: str = "no_name",
        cache_dir: Optional[Path] = None,
        skip_preprocess: Optional[bool] = False,
    ):
        super().__init__()
        self._contexts = contexts
        self._raw_data = data
        self.tokenizer = tokenizer
        self.test = test
        self.split_name = split_name

        if skip_preprocess:
            return

        cache_path = ((cache_dir /
                       f"_{split_name}_preprocessed_{include_nonrelevant}.json"
                       ) if cache_dir and split_name else None)

        if cache_path and cache_path.is_file():
            logger.info(
                f"Loading cached preprocessed dataset from {cache_path}...")
            self.data = json_load(cache_path)
        else:
            self.data = self.preprocess_dataset(
                self.tokenizer,
                contexts,
                data,
                include_nonrelevant=include_nonrelevant,
                test=self.test,
            )
            if cache_path:
                logger.info(
                    f"Saving cached preprocessed dataset to {cache_path}...")
                json_dump(self.data, cache_path)
Пример #6
0
    def train(self, train_dataloader, val_dataloader):
        logger.info(f"Training model for {self.total_epochs} epochs...")
        training_log = []
        for epoch in range(self.cur_epoch, self.total_epochs + 1):
            self.cur_epoch = epoch

            logger.info(f"Epoch {epoch:2d} / {self.total_epochs:2d}")
            train_time, (train_loss, train_metrics) = self.run_epoch(
                train_dataloader, split="train", train=True, epoch=epoch
            )
            logger.info(
                f"Train | {train_time:7.3f}s | loss: {train_loss:.3f} | "
                f"{self.format_metrics(train_metrics)}"
            )

            val_time, (val_loss, val_metrics) = self.run_epoch(
                val_dataloader, split="val", epoch=epoch
            )
            logger.info(
                f"Val   | {val_time:7.3f}s | loss: {val_loss:.3f} | "
                f"{self.format_metrics(val_metrics)}"
            )

            if epoch % self.checkpoint_freq == 0:
                self.save_checkpoint(self.checkpoint_dir / f"checkpoint_{epoch:03d}.pt")

            training_log.append(
                {
                    "epoch": epoch,
                    "train_time": train_time,
                    "train_loss": train_loss,
                    "train_metrics": train_metrics,
                    "val_time": val_time,
                    "val_loss": val_loss,
                    "val_metrics": val_metrics,
                }
            )

        self.model.save_weights(self.checkpoint_dir / "model_weights.pt")
        json_dump(training_log, self.checkpoint_dir / "train_log.json")