示例#1
0
class CometExperimentLogger(ExperimentLogger):
    def __init__(self, exp_name, online=True, **kwargs):
        super(CometExperimentLogger, self).__init__(exp_name, **kwargs)
        if online:
            self.comet = Experiment(project_name=exp_name, **kwargs)
        else:
            self.comet = OfflineExperiment(project_name=exp_name, **kwargs)

    def log_metric(self, tag, value, step, **kwargs):
        self.comet.log_metric(tag, value, step=step, **kwargs)

    def log_image(self, tag, img, step, **kwargs):
        self.comet.log_image(img, name=tag, step=step, **kwargs)

    def log_plt(self, tag, plt, step, **kwargs):
        self.comet.log_figure(figure=plt, figure_name=tag, step=step, **kwargs)

    def log_text(self, tag, text, **kwargs):
        self.comet.log_text(text, **kwargs)

    def log_parameters(self, params, **kwargs):
        self.comet.log_parameters(params, **kwargs)

    def start_epoch(self, **kwargs):
        super(CometExperimentLogger, self).start_epoch()

    def end_epoch(self, **kwargs):
        super(CometExperimentLogger, self).end_epoch()
        self.comet.log_epoch_end(self.epoch, **kwargs)

    def end_experiment(self):
        self.comet.end()
示例#2
0
        verbose = 10,
        n_jobs = 2,
        n_points = 2,
        scoring = 'accuracy',
    )

    checkpoint_callback = skopt.callbacks.CheckpointSaver(f'D:\\FINKI\\8_dps\\Project\\MODELS\\skopt_checkpoints\\{EXPERIMENT_ID}.pkl')
    hyperparameters_optimizer.fit(X_train, y_train, callback = [checkpoint_callback])
    skopt.dump(hyperparameters_optimizer, f'saved_models\\{EXPERIMENT_ID}.pkl')

    y_pred = hyperparameters_optimizer.best_estimator_.predict(X_test)

    for i in range(len(hyperparameters_optimizer.cv_results_['params'])):
        exp = OfflineExperiment(
            api_key = 'A8Lg71j9LtIrsv0deBA0DVGcR',
            project_name = ALGORITHM,
            workspace = "8_dps",
            auto_output_logging = 'native',
            offline_directory = f'D:\\FINKI\\8_dps\\Project\\MODELS\\comet_ml_offline_experiments\\{EXPERIMENT_ID}'
        )
        exp.set_name(f'{EXPERIMENT_ID}_{i + 1}')
        exp.add_tags([DS, SEGMENTS_LENGTH, ])
        for k, v in hyperparameters_optimizer.cv_results_.items():
            if k == "params": exp.log_parameters(dict(v[i]))
            else: exp.log_metric(k, v[i])
        exp.end()

        
        
        
示例#3
0
def main(args):
    torch.manual_seed(0)

    # Get device
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Get dataset
    dataset = Dataset("train.txt")
    loader = DataLoader(dataset,
                        batch_size=hp.batch_size**2,
                        shuffle=True,
                        collate_fn=dataset.collate_fn,
                        drop_last=True,
                        num_workers=hp.num_workers)

    speaker_encoder = None
    if hp.speaker_encoder_path != "":
        speaker_encoder = load_speaker_encoder(Path(hp.speaker_encoder_path),
                                               device).to(device)
        for param in speaker_encoder.parameters():
            param.requires_grad = False
        else:
            speaker_encoder.train()

    # Define model
    fastspeech_model = FastSpeech2(speaker_encoder).to(device)
    model = nn.DataParallel(fastspeech_model).to(device)
    print("Model Has Been Defined")
    num_param = utils.get_param_num(model)
    print('Number of FastSpeech2 Parameters:', num_param)

    # Optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-4,
                                 betas=hp.betas,
                                 eps=hp.eps,
                                 weight_decay=hp.weight_decay)
    scheduled_optim = ScheduledOptim(optimizer, hp.decoder_hidden,
                                     hp.n_warm_up_step, args.restore_step)
    Loss = FastSpeech2Loss().to(device)
    print("Optimizer and Loss Function Defined.")

    # Load checkpoint if exists
    checkpoint_path = os.path.join(hp.checkpoint_path)
    try:
        checkpoint = torch.load(
            os.path.join(checkpoint_path,
                         'checkpoint_{}.pth.tar'.format(args.restore_step)))
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step {}---\n".format(args.restore_step))
    except:
        print("\n---Start New Training---\n")
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)

    # Load vocoder
    if hp.vocoder == 'melgan':
        vocoder = utils.get_melgan()
        vocoder_infer = utils.melgan_infer
    elif hp.vocoder == 'waveglow':
        vocoder = utils.get_waveglow()
        vocoder_infer = utils.waveglow_infer
    else:
        raise ValueError("Vocoder '%s' is not supported", hp.vocoder)

    comet_experiment = None
    use_comet = int(os.getenv("USE_COMET", default=0))
    if use_comet != 0:
        if use_comet == 1:
            offline_dir = os.path.join(hp.models_path, "comet")
            os.makedirs(offline_dir, exist_ok=True)
            comet_experiment = OfflineExperiment(
                project_name="mlp-project",
                workspace="ino-voice",
                offline_directory=offline_dir,
            )
        elif use_comet == 2:
            comet_experiment = Experiment(
                api_key="BtyTwUoagGMh3uN4VZt6gMOn8",
                project_name="mlp-project",
                workspace="ino-voice",
            )

        comet_experiment.set_name(args.experiment_name)
        comet_experiment.log_parameters(hp)
        comet_experiment.log_html(args.m)

    start_time = time.perf_counter()
    first_mel_train_loss, first_postnet_train_loss, first_d_train_loss, first_f_train_loss, first_e_train_loss = \
        None, None, None, None, None

    for epoch in range(hp.epochs):
        total_step = hp.epochs * len(loader) * hp.batch_size
        for i, batchs in enumerate(loader):
            for j, data_of_batch in enumerate(batchs):
                model = model.train()

                current_step = i * hp.batch_size + j + args.restore_step + epoch * len(
                    loader) * hp.batch_size + 1

                # Get Data
                text = torch.from_numpy(
                    data_of_batch["text"]).long().to(device)
                mel_target = torch.from_numpy(
                    data_of_batch["mel_target"]).float().to(device)
                D = torch.from_numpy(data_of_batch["D"]).long().to(device)
                log_D = torch.from_numpy(
                    data_of_batch["log_D"]).float().to(device)
                f0 = torch.from_numpy(data_of_batch["f0"]).float().to(device)
                energy = torch.from_numpy(
                    data_of_batch["energy"]).float().to(device)
                src_len = torch.from_numpy(
                    data_of_batch["src_len"]).long().to(device)
                mel_len = torch.from_numpy(
                    data_of_batch["mel_len"]).long().to(device)
                max_src_len = np.max(data_of_batch["src_len"]).astype(np.int32)
                max_mel_len = np.max(data_of_batch["mel_len"]).astype(np.int32)

                # text = torch.from_numpy(data_of_batch["text"]).long()
                # mel_target = torch.from_numpy(data_of_batch["mel_target"]).float()
                # D = torch.from_numpy(data_of_batch["D"]).long()
                # log_D = torch.from_numpy(data_of_batch["log_D"]).float()
                # f0 = torch.from_numpy(data_of_batch["f0"]).float()
                # energy = torch.from_numpy(data_of_batch["energy"]).float()
                # src_len = torch.from_numpy(data_of_batch["src_len"]).long()
                # mel_len = torch.from_numpy(data_of_batch["mel_len"]).long()
                # max_src_len = np.max(data_of_batch["src_len"]).astype(np.int32)
                # max_mel_len = np.max(data_of_batch["mel_len"]).astype(np.int32)

                # Forward
                mel_output, mel_postnet_output, log_duration_output, f0_output, energy_output, src_mask, mel_mask, _ = \
                    model(text, src_len, mel_target, mel_len, D, f0, energy, max_src_len, max_mel_len)

                # Cal Loss
                mel_loss, mel_postnet_loss, d_loss, f_loss, e_loss = Loss(
                    log_duration_output, log_D, f0_output, f0, energy_output,
                    energy, mel_output, mel_postnet_output, mel_target,
                    ~src_mask, ~mel_mask)
                total_loss = mel_loss + mel_postnet_loss + d_loss + f_loss + e_loss

                # Set initial values for scaling
                if first_mel_train_loss is None:
                    first_mel_train_loss = mel_loss
                    first_postnet_train_loss = mel_postnet_loss
                    first_d_train_loss = d_loss
                    first_f_train_loss = f_loss
                    first_e_train_loss = e_loss

                mel_l = mel_loss.item() / first_mel_train_loss
                mel_postnet_l = mel_postnet_loss.item(
                ) / first_postnet_train_loss
                d_l = d_loss.item() / first_d_train_loss
                f_l = f_loss.item() / first_f_train_loss
                e_l = e_loss.item() / first_e_train_loss

                # Logger
                if comet_experiment is not None:
                    comet_experiment.log_metric(
                        "total_loss", mel_l + mel_postnet_l + d_l + f_l + e_l,
                        current_step)
                    comet_experiment.log_metric("mel_loss", mel_l,
                                                current_step)
                    comet_experiment.log_metric("mel_postnet_loss",
                                                mel_postnet_l, current_step)
                    comet_experiment.log_metric("duration_loss", d_l,
                                                current_step)
                    comet_experiment.log_metric("f0_loss", f_l, current_step)
                    comet_experiment.log_metric("energy_loss", e_l,
                                                current_step)

                # Backward
                total_loss = total_loss / hp.acc_steps
                total_loss.backward()
                if current_step % hp.acc_steps != 0:
                    continue

                # Clipping gradients to avoid gradient explosion
                nn.utils.clip_grad_norm_(model.parameters(),
                                         hp.grad_clip_thresh)

                # Update weights
                scheduled_optim.step_and_update_lr()
                scheduled_optim.zero_grad()

                # Print
                if current_step % hp.log_step == 0:
                    now = time.perf_counter()

                    print("\nEpoch [{}/{}], Step [{}/{}]:".format(
                        epoch + 1, hp.epochs, current_step, total_step))
                    print(
                        "Total Loss: {:.4f}, Mel Loss: {:.5f}, Mel PostNet Loss: {:.5f}, Duration Loss: {:.5f}, "
                        "F0 Loss: {:.5f}, Energy Loss: {:.5f};".format(
                            mel_l + mel_postnet_l + d_l + f_l + e_l, mel_l,
                            mel_postnet_l, d_l, f_l, e_l))
                    print("Time Used: {:.3f}s".format(now - start_time))
                    start_time = now

                if current_step % hp.checkpoint == 0:
                    file_path = os.path.join(
                        checkpoint_path,
                        'checkpoint_{}.pth.tar'.format(current_step))
                    torch.save(
                        {
                            'model': model.state_dict(),
                            'optimizer': optimizer.state_dict()
                        }, file_path)
                    print("saving model at to {}".format(file_path))

                if current_step % hp.synth_step == 0:
                    length = mel_len[0].item()
                    mel_target_torch = mel_target[
                        0, :length].detach().unsqueeze(0).transpose(1, 2)
                    mel_target = mel_target[
                        0, :length].detach().cpu().transpose(0, 1)
                    mel_torch = mel_output[0, :length].detach().unsqueeze(
                        0).transpose(1, 2)
                    mel = mel_output[0, :length].detach().cpu().transpose(0, 1)
                    mel_postnet_torch = mel_postnet_output[
                        0, :length].detach().unsqueeze(0).transpose(1, 2)
                    mel_postnet = mel_postnet_output[
                        0, :length].detach().cpu().transpose(0, 1)

                    if comet_experiment is not None:
                        comet_experiment.log_audio(
                            audiotools.inv_mel_spec(mel), hp.sampling_rate,
                            "step_{}_griffin_lim.wav".format(current_step))
                        comet_experiment.log_audio(
                            audiotools.inv_mel_spec(mel_postnet),
                            hp.sampling_rate,
                            "step_{}_postnet_griffin_lim.wav".format(
                                current_step))
                        comet_experiment.log_audio(
                            vocoder_infer(mel_torch,
                                          vocoder), hp.sampling_rate,
                            'step_{}_{}.wav'.format(current_step, hp.vocoder))
                        comet_experiment.log_audio(
                            vocoder_infer(mel_postnet_torch, vocoder),
                            hp.sampling_rate, 'step_{}_postnet_{}.wav'.format(
                                current_step, hp.vocoder))
                        comet_experiment.log_audio(
                            vocoder_infer(mel_target_torch,
                                          vocoder), hp.sampling_rate,
                            'step_{}_ground-truth_{}.wav'.format(
                                current_step, hp.vocoder))

                        f0 = f0[0, :length].detach().cpu().numpy()
                        energy = energy[0, :length].detach().cpu().numpy()
                        f0_output = f0_output[
                            0, :length].detach().cpu().numpy()
                        energy_output = energy_output[
                            0, :length].detach().cpu().numpy()

                        utils.plot_data(
                            [(mel_postnet.numpy(), f0_output, energy_output),
                             (mel_target.numpy(), f0, energy)],
                            comet_experiment, [
                                'Synthesized Spectrogram',
                                'Ground-Truth Spectrogram'
                            ])

                if current_step % hp.eval_step == 0:
                    model.eval()
                    with torch.no_grad():
                        if comet_experiment is not None:
                            with comet_experiment.validate():
                                d_l, f_l, e_l, m_l, m_p_l = evaluate(
                                    model, current_step, comet_experiment)
                                t_l = d_l + f_l + e_l + m_l + m_p_l

                                comet_experiment.log_metric(
                                    "total_loss", t_l, current_step)
                                comet_experiment.log_metric(
                                    "mel_loss", m_l, current_step)
                                comet_experiment.log_metric(
                                    "mel_postnet_loss", m_p_l, current_step)
                                comet_experiment.log_metric(
                                    "duration_loss", d_l, current_step)
                                comet_experiment.log_metric(
                                    "F0_loss", f_l, current_step)
                                comet_experiment.log_metric(
                                    "energy_loss", e_l, current_step)
示例#4
0
class Logger:
    def __init__(self, send_logs, tags, parameters, experiment=None):
        self.stations = 5
        self.send_logs = send_logs
        if self.send_logs:
            if experiment is None:
                json_loc = glob.glob("./**/comet_token.json")[0]
                with open(json_loc, "r") as f:
                    kwargs = json.load(f)

                self.experiment = OfflineExperiment(**kwargs)
            else:
                self.experiment = experiment
        self.sent_mb = 0
        self.speed_window = deque(maxlen=100)
        self.step_time = None
        self.current_speed = 0
        if self.send_logs:
            if tags is not None:
                self.experiment.add_tags(tags)
            if parameters is not None:
                self.experiment.log_parameters(parameters)

    def begin_logging(self, episode_count, steps_per_ep, sigma, theta, step_time):
        self.step_time = step_time
        if self.send_logs:
            self.experiment.log_parameter("Episode count", episode_count)
            self.experiment.log_parameter("Steps per episode", steps_per_ep)
            self.experiment.log_parameter("theta", theta)
            self.experiment.log_parameter("sigma", sigma)

    def log_round(self, states, reward, cumulative_reward, info, loss, observations, step):
        self.experiment.log_histogram_3d(states, name="Observations", step=step)
        info = [[j for j in i.split("|")] for i in info]
        info = np.mean(np.array(info, dtype=np.float32), axis=0)
        try:
            round_mb = info[0]
        except Exception as e:
            print(info)
            print(reward)
            raise e
        self.speed_window.append(round_mb)
        self.current_speed = np.mean(np.asarray(self.speed_window)/self.step_time)
        self.sent_mb += round_mb
        CW = info[1]
        CW_ax = info[2]
        self.stations = info[3]
        fairness = info[4]

        if self.send_logs:
            self.experiment.log_metric("Round reward", np.mean(reward), step=step)
            self.experiment.log_metric("Per-ep reward", np.mean(cumulative_reward), step=step)
            self.experiment.log_metric("Megabytes sent", self.sent_mb, step=step)
            self.experiment.log_metric("Round megabytes sent", round_mb, step=step)
            self.experiment.log_metric("Chosen CW for legacy devices", CW, step=step)
            self.experiment.log_metric("Chosen CW for 802.11ax devices", CW_ax, step=step)
            self.experiment.log_metric("Station count", self.stations, step=step)
            self.experiment.log_metric("Current throughput", self.current_speed, step=step)
            self.experiment.log_metric("Fairness index", fairness, step=step)

            for i, obs in enumerate(observations):
                self.experiment.log_metric(f"Observation {i}", obs, step=step)

            self.experiment.log_metrics(loss, step=step)

    def log_episode(self, cumulative_reward, speed, step):
        if self.send_logs:
            self.experiment.log_metric("Cumulative reward", cumulative_reward, step=step)
            self.experiment.log_metric("Speed", speed, step=step)

        self.sent_mb = 0
        self.last_speed = speed
        self.speed_window = deque(maxlen=100)
        self.current_speed = 0

    def end(self):
        if self.send_logs:
            self.experiment.end()
示例#5
0
 
 iterations = 0
 start = time.time()
 best_valid_loss = -1
 header = '  Time Epoch Iteration Progress    (%Epoch)   Loss'
 dev_log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f}'.split(','))
 log_template =     ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f}'.split(','))
 print(header)
 
 with experiment.train():
     for epoch in range(config["training"]["epochs"]):
         for batch_idx, (X_batch, y_batch) in enumerate(training_generator):
             X_batch, y_batch = X_batch.to(device), y_batch.to(device)
             X_batch, y_batch = X_batch.permute(1, 0, 2), y_batch.permute(1, 0, 2)
             train_loss = train(X_batch, y_batch, model, opt, criterion, config["clip"])
             experiment.log_metric("train_loss", train_loss, step=iterations)
             # checkpoint model periodically
             if iterations % config["every"]["save"] == 0:
                 snapshot_prefix = os.path.join(config["result_directory"], 'snapshot')
                 snapshot_path = snapshot_prefix + '_loss_{:.6f}_iter_{}_model.pt'.format(train_loss, iterations)
                 torch.save({
                     'model': model.state_dict(),
                     'opt': opt.state_dict(),
                 }, snapshot_path)
                 
                 for f in glob.glob(snapshot_prefix + '*'):
                     if f != snapshot_path:
                         os.remove(f)
             
             # evaluate performance on validation set periodically
             if iterations % config["every"]["validate"] == 0:
示例#6
0
class CometLogger(Logger):
    def __init__(
        self,
        batch_size: int,
        snapshot_dir: Optional[str] = None,
        snapshot_mode: str = "last",
        snapshot_gap: int = 1,
        exp_set: Optional[str] = None,
        use_print_exp: bool = False,
        saved_exp: Optional[str] = None,
        **kwargs,
    ):
        """
        :param kwargs: passed to comet's Experiment at init.
        """
        if use_print_exp:
            self.experiment = PrintExperiment()
        else:
            from comet_ml import Experiment, ExistingExperiment, OfflineExperiment

            if saved_exp:
                self.experiment = ExistingExperiment(
                    previous_experiment=saved_exp, **kwargs
                )
            else:
                try:
                    self.experiment = Experiment(**kwargs)
                except ValueError:  # no API key
                    log_dir = Path.home() / "logs"
                    log_dir.mkdir(exist_ok=True)
                    self.experiment = OfflineExperiment(offline_directory=str(log_dir))

        self.experiment.log_parameter("complete", False)
        if exp_set:
            self.experiment.log_parameter("exp_set", exp_set)
        if snapshot_dir:
            snapshot_dir = Path(snapshot_dir) / self.experiment.get_key()
        # log_traj_window (int): How many trajectories to hold in deque for computing performance statistics.
        self.log_traj_window = 100
        self._cum_metrics = {
            "n_unsafe_actions": 0,
            "constraint_used": 0,
            "cum_completed_trajs": 0,
            "logging_time": 0,
        }
        self._new_completed_trajs = 0
        self._last_step = 0
        self._start_time = self._last_time = time()
        self._last_snapshot_upload = 0
        self._snaphot_upload_time = 30 * 60

        super().__init__(batch_size, snapshot_dir, snapshot_mode, snapshot_gap)

    def log_fast(
        self,
        step: int,
        traj_infos: Sequence[Dict[str, float]],
        opt_info: Optional[Tuple[Sequence[float], ...]] = None,
        test: bool = False,
    ) -> None:
        if not traj_infos:
            return
        start = time()

        self._new_completed_trajs += len(traj_infos)
        self._cum_metrics["cum_completed_trajs"] += len(traj_infos)
        # TODO: do we need to support sum(t[k]) if key in k?
        # without that, this doesn't include anything from extra eval samplers
        for key in self._cum_metrics:
            if key == "cum_completed_trajs":
                continue
            self._cum_metrics[key] += sum(t.get(key, 0) for t in traj_infos)
        self._cum_metrics["logging_time"] += time() - start

    def log(
        self,
        step: int,
        traj_infos: Sequence[Dict[str, float]],
        opt_info: Optional[Tuple[Sequence[float], ...]] = None,
        test: bool = False,
    ):
        self.log_fast(step, traj_infos, opt_info, test)
        start = time()
        with (self.experiment.test() if test else nullcontext()):
            step *= self.batch_size
            if opt_info is not None:
                # grad norm is left on the GPU for some reason
                # https://github.com/astooke/rlpyt/issues/163
                self.experiment.log_metrics(
                    {
                        k: np.mean(v)
                        for k, v in opt_info._asdict().items()
                        if k != "gradNorm"
                    },
                    step=step,
                )

            if traj_infos:
                agg_vals = {}
                for key in traj_infos[0].keys():
                    if key in self._cum_metrics:
                        continue
                    agg_vals[key] = sum(t[key] for t in traj_infos) / len(traj_infos)
                self.experiment.log_metrics(agg_vals, step=step)

            if not test:
                now = time()
                self.experiment.log_metrics(
                    {
                        "new_completed_trajs": self._new_completed_trajs,
                        "steps_per_second": (step - self._last_step)
                        / (now - self._last_time),
                    },
                    step=step,
                )
                self._last_time = now
                self._last_step = step
                self._new_completed_trajs = 0

        self.experiment.log_metrics(self._cum_metrics, step=step)
        self._cum_metrics["logging_time"] += time() - start

    def log_metric(self, name, val):
        self.experiment.log_metric(name, val)

    def log_parameters(self, parameters):
        self.experiment.log_parameters(parameters)

    def log_config(self, config):
        self.experiment.log_parameter("config", json.dumps(convert_dict(config)))

    def upload_snapshot(self):
        if self.snapshot_dir:
            self.experiment.log_asset(self._previous_snapshot_fname)

    def save_itr_params(
        self, step: int, params: Dict[str, Any], metric: Optional[float] = None
    ) -> None:
        super().save_itr_params(step, params, metric)
        now = time()
        if now - self._last_snapshot_upload > self._snaphot_upload_time:
            self._last_snapshot_upload = now
            self.upload_snapshot()

    def shutdown(self, error: bool = False) -> None:
        if not error:
            self.upload_snapshot()
            self.experiment.log_parameter("complete", True)
        self.experiment.end()
示例#7
0
            labels = Variable(labels)

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = rnn(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Compute train accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += float((predicted == labels.data).sum())

            # Log accuracy to Comet.ml
            experiment.log_metric("accuracy", correct / total, step=step)
            step += 1

            if (i + 1) % 100 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' %
                      (epoch + 1, hyper_params['num_epochs'], i + 1,
                       len(train_dataset) // hyper_params['batch_size'],
                       loss.data.item()))
        experiment.log_epoch_end(epoch)
with experiment.test():
    # Test the Model
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = Variable(
            images.view(-1, hyper_params['sequence_length'],