Пример #1
0
 def get_subpart_data(df, subpart_data):
     column = "filename"
     if not subpart_data > len(df[column].unique()):
         filenames = df[column].drop_duplicates().sample(subpart_data,
                                                         random_state=10)
         df = df[df[column].isin(filenames)].reset_index(drop=True)
         LOG.debug(
             "Taking subpart of the data, len : {}, df_len: {}".format(
                 subpart_data, len(df)))
     return df
Пример #2
0
    def means(self, dataset):
        """
       Splits a dataset in to train test validation.
       :param dataset: dataset, from DataLoad class, each sample is an (X, y) tuple.
       """
        LOG.info('computing mean')
        start = time.time()

        shape = None

        counter = 0
        for sample in dataset:
            if type(sample) in [tuple, list] and len(sample)==2:
                batch_X, _ = sample
            else:
                batch_X = sample
            if type(batch_X) is torch.Tensor:
                batch_X_arr = batch_X.numpy()
            else:
                batch_X_arr = batch_X
            data_square = batch_X_arr ** 2
            counter += 1

            if shape is None:
                shape = batch_X_arr.shape
            else:
                if not batch_X_arr.shape == shape:
                    raise NotImplementedError("Not possible to add data with different shape in mean calculation yet")

            # assume first item will have shape info
            if self.mean_ is None:
                self.mean_ = self.mean(batch_X_arr, axis=-1)
            else:
                self.mean_ += self.mean(batch_X_arr, axis=-1)

            if self.mean_of_square_ is None:
                self.mean_of_square_ = self.mean(data_square, axis=-1)
            else:
                self.mean_of_square_ += self.mean(data_square, axis=-1)

        self.mean_ /= counter
        self.mean_of_square_ /= counter

        ## To be used if data different shape, but need to stop the iteration before.
        # rest = len(dataset) - i
        # if rest != 0:
        #     weight = rest / float(i + rest)
        #     X, y = dataset[-1]
        #     data_square = X ** 2
        #     mean = mean * (1 - weight) + self.mean(X, axis=-1) * weight
        #     mean_of_square = mean_of_square * (1 - weight) + self.mean(data_square, axis=-1) * weight

        LOG.debug('time to compute means: ' + str(time.time() - start))
        return self
def train(train_loader, model, optimizer, epoch, weak_mask=None, strong_mask=None):
    class_criterion = nn.BCELoss()
    [class_criterion] = to_cuda_if_available([class_criterion])

    meters = AverageMeterSet()
    meters.update('lr', optimizer.param_groups[0]['lr'])

    LOG.debug("Nb batches: {}".format(len(train_loader)))
    start = time.time()
    for i, (batch_input, target) in enumerate(train_loader):
        [batch_input, target] = to_cuda_if_available([batch_input, target])
        LOG.debug(batch_input.mean())

        strong_pred, weak_pred = model(batch_input)
        loss = 0
        if weak_mask is not None:
            # Weak BCE Loss
            # Trick to not take unlabeled data
            # Todo figure out another way
            target_weak = target.max(-2)[0]
            weak_class_loss = class_criterion(weak_pred[weak_mask], target_weak[weak_mask])
            if i == 1:
                LOG.debug("target: {}".format(target.mean(-2)))
                LOG.debug("Target_weak: {}".format(target_weak))
                LOG.debug(weak_class_loss)
            meters.update('Weak loss', weak_class_loss.item())

            loss += weak_class_loss

        if strong_mask is not None:
            # Strong BCE loss
            strong_class_loss = class_criterion(strong_pred[strong_mask], target[strong_mask])
            meters.update('Strong loss', strong_class_loss.item())

            loss += strong_class_loss

        assert not (np.isnan(loss.item()) or loss.item() > 1e5), 'Loss explosion: {}'.format(loss.item())
        assert not loss.item() < 0, 'Loss problem, cannot be negative'
        meters.update('Loss', loss.item())

        # compute gradient and do optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_time = time.time() - start

    LOG.info(
        'Epoch: {}\t'
        'Time {:.2f}\t'
        '{meters}'.format(
            epoch, epoch_time, meters=meters))
Пример #4
0
def loop_batches_acc_grad(indexes, dataset, model_triplet, semi_hard_input=None, semi_hard_embed=None, i=0):
    out = []
    out_pos = []
    out_neg = []
    # zero the parameter gradients

    for j, ind in enumerate(indexes):
        samples = dataset[ind]

        inputs, inputs_pos, inputs_neg, pred_labels = samples
        inputs, inputs_pos = to_cuda_if_available(inputs, inputs_pos)
        if i < 2:
            LOG.debug("input shape: {}".format(inputs.shape))
        if semi_hard_input is not None or semi_hard_embed is not None:
            assert semi_hard_input is not None, "semi_hard_input and semi_hard_embed should be defined"
            assert semi_hard_embed is not None, "semi_hard_input and semi_hard_embed should be defined"
            model_triplet.eval()

            embed = get_embeddings_numpy(inputs, model_triplet)
            embed_pos = get_embeddings_numpy(inputs_pos, model_triplet)

            label_mask = (pred_labels.numpy() == -1).all(-1)
            semi_hard_mask = np.isnan(inputs_neg.detach().numpy()).reshape(inputs_neg.shape[0], -1).all(-1)
            mask = label_mask & semi_hard_mask

            if i < 2:
                LOG.debug("mask: {}".format(mask))
            negative_indexes = compute_semi_hard_indexes(embed[mask], embed_pos[mask], semi_hard_embed)
            inputs_neg[np.where(mask)] = semi_hard_input[negative_indexes]
        inputs_neg = to_cuda_if_available(inputs_neg)

        model_triplet.eval()
        with torch.no_grad():
            outputs_pos = model_triplet(inputs_pos)
            outputs_neg = model_triplet(inputs_neg)

        model_triplet.train()
        # forward + backward + optimize
        outputs = model_triplet(inputs)

        out.append(outputs)
        out_pos.append(outputs_pos)
        out_neg.append(outputs_neg)

    outputs = torch.stack(out, 0)
    outputs_pos = torch.stack(out_pos, 0)
    outputs_neg = torch.stack(out_neg, 0)
    return outputs, outputs_pos, outputs_neg
Пример #5
0
    def get_sample(self, index):
        """From an index, get the features and the labels to create a sample

        Args:
            index: int, Index of the sample desired

        Returns:
            tuple
            Tuple containing the features and the labels (numpy.array, numpy.array)

        """
        features = self.get_feature_file_func(self.filenames.iloc[index])
        # print("filenames:{}".format(self.filenames.iloc[index]))
        # event_labels means weak labels, event_label means strong labels
        if "event_labels" in self.df.columns or {
                "onset", "offset", "event_label"
        }.issubset(self.df.columns):
            if "event_labels" in self.df.columns:
                label = self.df.iloc[index]["event_labels"]
                if pd.isna(label):
                    label = []
                if type(label) is str:
                    if label == "":
                        label = []
                    else:
                        label = label.split(",")
            else:
                cols = ["onset", "offset", "event_label"]
                label = self.df[self.df.filename ==
                                self.filenames.iloc[index]][cols]
                if label.empty:
                    label = []
        else:
            label = "empty"  # trick to have -1 for unlabeled data and concat them with labeled
            if "filename" not in self.df.columns:
                raise NotImplementedError(
                    "Dataframe to be encoded doesn't have specified columns: columns allowed: 'filename' for unlabeled;"
                    "'filename', 'event_labels' for weak labels; 'filename' 'onset' 'offset' 'event_label' "
                    "for strong labels, yours: {}".format(self.df.columns))
        if index == 0:
            LOG.debug("label to encode: {}".format(label))
        if self.encode_function is not None:
            # labels are a list of string or list of list [[label, onset, offset]]
            y = self.encode_function(label)
        else:
            y = label
        sample = features, y
        return sample
Пример #6
0
    def extract_features_from_meta(self,
                                   csv_audio,
                                   feature_dir,
                                   subpart_data=None):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the path to the directory where the features are
            subpart_data: int, number of files to extract features from the csv.
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        for ind, wav_name in enumerate(df_meta.filename.unique()):
            if ind % 500 == 0:
                LOG.debug(ind)
            wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
            wav_path = os.path.join(wav_dir, wav_name)

            out_filename = os.path.join(feature_dir,
                                        name_only(wav_name) + ".npy")

            if not os.path.exists(out_filename):
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    (audio, _) = read_audio(wav_path, cfg.sample_rate)
                    if audio.shape[0] == 0:
                        print("File %s is corrupted!" % wav_path)
                    else:
                        mel_spec = self.calculate_mel_spec(
                            audio, log_feature=self.save_log_feature)

                        np.save(out_filename, mel_spec)

                    LOG.debug("compute features time: %s" % (time.time() - t1))

        return df_meta.reset_index(drop=True)
Пример #7
0
    def __init__(self, serie_labels, classes, n_per_class, n_classes=None):
        super(CategoriesSampler, self).__init__(serie_labels)
        self.n_per_class = n_per_class
        self.classes = classes
        # self.n_batch = int(len(serie_labels) // (n_per_class * len(classes)))
        self.n_batch = int(serie_labels.value_counts().min() / n_per_class)
        self.serie_labels = serie_labels
        self.n_classes = n_classes
        LOG.debug(
            f"sampler has: {self.n_batch} batches of {n_per_class} samples per classes, "
            f"len serie: {len(serie_labels)}")

        self.ind_per_class = []
        for label in classes:
            ind = np.argwhere(
                serie_labels.str.contains(label)).reshape(-1).tolist()
            if len(ind) > 0:
                self.ind_per_class.append(ind)
Пример #8
0
    def train_loop(train_load, model):
        loss_bce = []
        if args.segment:
            for cnt, indexes in enumerate(train_load.batch_sampler):
                optimizer.zero_grad()
                for j, ind in enumerate(indexes):
                    inputs, pred_labels = train_set[ind]
                    if cnt == 0 and epoch_ == 0:
                        LOG.debug("classif input shape: {}".format(
                            inputs.shape))

                    # zero the parameter gradients
                    inputs, pred_labels = to_cuda_if_available(
                        inputs, pred_labels)

                    # forward + backward + optimize
                    weak_out = model(inputs)
                    loss_bce = criterion_bce(
                        weak_out, pred_labels.argmax(0, keepdim=True))
                    loss_bce.backward()
                    loss_bce.append(loss_bce.item())
                optimizer.step()
        else:
            for cnt, samples in enumerate(train_load):
                optimizer.zero_grad()
                inputs, pred_labels = samples
                if cnt == 0 and epoch_ == 0:
                    LOG.debug("classif input shape: {}".format(inputs.shape))

                # zero the parameter gradients
                inputs, pred_labels = to_cuda_if_available(inputs, pred_labels)

                # forward + backward + optimize
                weak_out = model(inputs)
                loss_bce = criterion_bce(weak_out, pred_labels)
                loss_bce.backward()
                loss_bce.append(loss_bce.item())
                optimizer.step()
        loss_bce = np.mean(loss_bce)
        print('[%d / %d, %5d] loss: %.3f' %
              (epoch_ + 1, n_epochs, cnt + 1, loss_bce))
        return loss_bce, model
Пример #9
0
def get_predictions(model, valid_dataset, decoder, save_predictions=None):
    for i, (input, _) in enumerate(valid_dataset):
        [input] = to_cuda_if_available([input])

        pred_strong, _ = model(input.unsqueeze(0))
        pred_strong = pred_strong.cpu()
        pred_strong = pred_strong.squeeze(0).detach().numpy()
        if i == 0:
            LOG.debug(pred_strong)
        pred_strong = ProbabilityEncoder().binarization(pred_strong, binarization_type="global_threshold",
                                                        threshold=0.5)
        pred_strong = scipy.ndimage.filters.median_filter(pred_strong, (cfg.median_window, 1))
        pred = decoder(pred_strong)
        pred = pd.DataFrame(pred, columns=["event_label", "onset", "offset"])
        pred["filename"] = valid_dataset.filenames.iloc[i]
        if i == 0:
            LOG.debug("predictions: \n{}".format(pred))
            LOG.debug("predictions strong: \n{}".format(pred_strong))
            prediction_df = pred.copy()
        else:
            prediction_df = prediction_df.append(pred)

    if save_predictions is not None:
        LOG.info("Saving predictions at: {}".format(save_predictions))
        prediction_df.to_csv(save_predictions, index=False, sep="\t")
    return prediction_df
Пример #10
0
    # ##############
    # Triplet dataset
    # #############
    batch_size = cfg.batch_size
    num_workers = cfg.num_workers

    list_trans_fr = [ApplyLog(), ToTensor(), Unsqueeze(0)]
    if args.segment:
        list_trans_fr.append(Unsqueeze(0))

    train_set = DataLoadDf(train_weak_df,
                           many_hot_encoder.encode_weak,
                           Compose(list_trans_fr),
                           return_indexes=False)
    LOG.debug("len train : {}".format(len(train_set)))
    # train_load = DataLoader(train_set, batch_size=batch_size, num_workers=num_workers, shuffle=True,
    #                         drop_last=True, collate_fn=default_collate)

    # scaler = Scaler()
    scaler = ScalerSum()
    scaler.calculate_scaler(train_set)
    LOG.debug(scaler.mean_)

    list_trans_fr.append(Normalize(scaler))
    train_set.set_transform(Compose(list_trans_fr))
    # Validation data
    valid_weak_df = dfs["valid"]
    if valid_weak_df is not None:
        valid_set = DataLoadDf(valid_weak_df,
                               many_hot_encoder.encode_weak,
Пример #11
0
                           many_hot_encoder.encode_weak,
                           Compose(list_trans_fr),
                           return_indexes=False)
    if args.balance:
        train_sampler = CategoriesSampler(train_set.df.event_labels, classes,
                                          round(cfg.batch_size / len(classes)))
        train_load = DataLoader(train_set,
                                num_workers=num_workers,
                                batch_sampler=train_sampler)
    else:
        train_load = DataLoader(train_set,
                                num_workers=num_workers,
                                batch_size=batch_size,
                                shuffle=True)
        train_sampler = train_load.batch_sampler
    LOG.debug("len train : {}".format(len(train_set)))
    scaler = ScalerSum()
    scaler.calculate_scaler(train_set)
    LOG.debug(scaler.mean_)

    list_trans_fr.append(Normalize(scaler))
    train_set.set_transform(Compose(list_trans_fr))
    # Validation data
    valid_weak_df = dfs["valid"]
    if valid_weak_df is not None:
        valid_set = DataLoadDf(valid_weak_df,
                               many_hot_encoder.encode_weak,
                               Compose(list_trans_fr),
                               return_indexes=False)
        if args.balance:
            val_sampler = CategoriesSampler(
Пример #12
0
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None,
                     valid_loader=None, state={},
                     dir_model="model", result_path="res", recompute=True):
    criterion_bce = nn.BCELoss()
    classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce)
    print(classif_model)

    early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup",
                                        init_patience=cfg.first_early_wait)
    save_best_call = SaveBest(val_comp="sup")

    # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr,
    #                               verbose=True)
    print(optimizer_classif)

    save_results = pd.DataFrame()

    create_folder(dir_model)
    if cfg.save_best:
        model_path_sup1 = os.path.join(dir_model, "best_model")
    else:
        model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier))
    print("path of model : " + model_path_sup1)

    state['many_hot_encoder'] = many_hot_encoder.state_dict()

    if not os.path.exists(model_path_sup1) or recompute:
        for epoch_ in range(cfg.n_epoch_classifier):
            print(classif_model.training)
            start = time.time()
            loss_mean_bce = []
            for i, samples in enumerate(train_loader):
                inputs, pred_labels = samples
                if i == 0:
                    LOG.debug("classif input shape: {}".format(inputs.shape))

                # zero the parameter gradients
                optimizer_classif.zero_grad()
                inputs = to_cuda_if_available(inputs)

                # forward + backward + optimize
                weak_out = classif_model(inputs)
                weak_out = to_cpu(weak_out)
                # print(output)
                loss_bce = criterion_bce(weak_out, pred_labels)
                loss_mean_bce.append(loss_bce.item())
                loss_bce.backward()
                optimizer_classif.step()

            loss_mean_bce = np.mean(loss_mean_bce)
            classif_model.eval()
            n_class = len(many_hot_encoder.labels)
            macro_f_measure_train = get_f_measure_by_class(classif_model, n_class,
                                                           train_loader)
            if valid_loader is not None:
                macro_f_measure = get_f_measure_by_class(classif_model, n_class,
                                                         valid_loader)
                mean_macro_f_measure = np.mean(macro_f_measure)
            else:
                mean_macro_f_measure = -1
            classif_model.train()
            print("Time to train an epoch: {}".format(time.time() - start))
            # print statistics
            print('[%d / %d, %5d] loss: %.3f' %
                  (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce))

            results = {"train_loss": loss_mean_bce,
                       "macro_measure_train": np.mean(macro_f_measure_train),
                       "class_macro_train": np.array_str(macro_f_measure_train, precision=2),
                       "macro_measure_valid": mean_macro_f_measure,
                       "class_macro_valid": np.array_str(macro_f_measure, precision=2),
                       }
            for key in results:
                LOG.info("\t\t ---->  {} : {}".format(key, results[key]))

            save_results = save_results.append(results, ignore_index=True)
            # scheduler.step(mean_macro_f_measure)

            # ##########
            # # Callbacks
            # ##########
            state['epoch'] = epoch_ + 1
            state["model"]["state_dict"] = classif_model.state_dict()
            state["optimizer"]["state_dict"] = optimizer_classif.state_dict()
            state["loss"] = loss_mean_bce
            state.update(results)

            if cfg.early_stopping is not None:
                if early_stopping_call.apply(mean_macro_f_measure):
                    print("EARLY STOPPING")
                    break

            if cfg.save_best and save_best_call.apply(mean_macro_f_measure):
                save_model(state, model_path_sup1)

        if cfg.save_best:
            LOG.info(
                "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val))
            LOG.info("loading model from: {}".format(model_path_sup1))
            classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True)
        else:
            model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier))
            save_model(state, model_path_sup1)
        LOG.debug("model path: {}".format(model_path_sup1))
        LOG.debug('Finished Training')
    else:
        classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True)
    LOG.info("#### End classif")
    save_results.to_csv(result_path, sep="\t", header=True, index=False)

    return classif_model, state
Пример #13
0
    def extract_features_from_meta_frames(self,
                                          csv_audio,
                                          feature_dir,
                                          frames_in_sec,
                                          subpart_data=None):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the directory where the features are or will be created
            subpart_data: int, number of files to extract features from the csv.
            frames_in_sec: int, number of frames to take for a subsegment.
        """
        frames = int(frames_in_sec * cfg.sample_rate / cfg.hop_length)
        t1 = time.time()
        df_meta = pd.read_csv(csv_audio, header=0, sep="\t")
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        # Csv to store the features
        ext_name = "_" + str(frames)
        if subpart_data is not None and subpart_data < len(
                df_meta.filename.unique()):
            ext_name += "_sub" + str(subpart_data)
            df_meta = self.get_subpart_data(df_meta, subpart_data)

        self.get_classes(df_meta)

        meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1])
        csv_features = os.path.join(self.metadata_dir,
                                    meta_base + ext_name + meta_ext)

        wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
        df_features = pd.DataFrame()

        path_exists = os.path.exists(csv_features)

        if not path_exists:
            LOG.debug("Creating new feature df")

            # Loop in all the filenames
            cnt_new_features = 0
            for ind, wav_name in enumerate(df_meta.filename.unique()):
                wav_path = os.path.join(wav_dir, wav_name)
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted, deleting...!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    try:
                        audio_len_sec = soundfile.info(wav_path).duration
                    except Exception as e:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                        print(e)
                        continue
                    if audio_len_sec == 0:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                    else:
                        files_exist = True
                        # How many features we can compute from this file ?
                        cnt_max = min(
                            int(audio_len_sec // frames_in_sec),
                            int(cfg.max_len_seconds // frames_in_sec))
                        if cnt_max == 0:
                            cnt_max = 1

                        base_wav_name = os.path.join(feature_dir,
                                                     name_only(wav_name))
                        # Check if files already exist
                        out_filenames = [
                            base_wav_name + "fr" + str(frames) + "_" +
                            str(cnt * frames) + "-" + str(
                                (cnt + 1) * frames) + ".npy"
                            for cnt in range(cnt_max)
                        ]
                        for fname in out_filenames:
                            if not os.path.exists(fname):
                                files_exist = False
                                break

                        if not files_exist:
                            if cnt_new_features % 500 == 0:
                                LOG.debug(f"new features, {cnt_new_features}")
                            cnt_new_features += 1
                            audio, cnt_max = self.get_features(
                                wav_path, feature_dir, frames)
                            out_filenames = [
                                base_wav_name + "fr" + str(frames) + "_" +
                                str(cnt * frames) + "-" + str(
                                    (cnt + 1) * frames) + ".npy"
                                for cnt in range(cnt_max)
                            ]

                        # features label to add to the dataframe
                        add_item = self.get_labels(ind, df_meta, wav_name,
                                                   frames, out_filenames)

                        df_features = df_features.append(
                            pd.DataFrame(add_item), ignore_index=True)

            LOG.info(csv_features)
            df_features.to_csv(csv_features,
                               sep="\t",
                               header=True,
                               index=False)
            df_features = pd.read_csv(
                csv_features,
                sep="\t")  # Otherwise event_labels is "" and not NaN
        else:
            df_features = self.get_df_from_meta(
                csv_features)  # No subpart data because should be in the name

        LOG.debug("compute features time: %s" % (time.time() - t1))
        return df_features
def train(train_loader,
          model,
          optimizer,
          epoch,
          ema_model=None,
          weak_mask=None,
          strong_mask=None):
    """ One epoch of a Mean Teacher model
    :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch.
    Should return 3 values: teacher input, student input, labels
    :param model: torch.Module, model to be trained, should return a weak and strong prediction
    :param optimizer: torch.Module, optimizer used to train the model
    :param epoch: int, the current epoch of training
    :param ema_model: torch.Module, student model, should return a weak and strong prediction
    :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss)
    :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss)
    """
    class_criterion = nn.BCELoss()

    ##################################################
    class_criterion1 = nn.BCELoss(reduction='none')
    ##################################################

    consistency_criterion = nn.MSELoss()

    # [class_criterion, consistency_criterion] = to_cuda_if_available(
    #     [class_criterion, consistency_criterion])
    [class_criterion, class_criterion1,
     consistency_criterion] = to_cuda_if_available(
         [class_criterion, class_criterion1, consistency_criterion])

    meters = AverageMeterSet()

    LOG.debug("Nb batches: {}".format(len(train_loader)))
    start = time.time()
    rampup_length = len(train_loader) * cfg.n_epoch // 2

    print("Train\n")
    # LOG.info("Weak[k] -> Weak[k]")
    # LOG.info("Weak[k] -> strong[k]")

    # print(weak_mask.start)
    # print(strong_mask.start)
    # exit()
    count = 0
    check_cus_weak = 0
    difficulty_loss = 0
    loss_w = 1
    LOG.info("loss paramater:{}".format(loss_w))
    for i, (batch_input, ema_batch_input, target) in enumerate(train_loader):
        # print(batch_input.shape)
        # print(ema_batch_input.shape)
        # exit()
        global_step = epoch * len(train_loader) + i
        if global_step < rampup_length:
            rampup_value = ramps.sigmoid_rampup(global_step, rampup_length)
        else:
            rampup_value = 1.0

        # Todo check if this improves the performance
        # adjust_learning_rate(optimizer, rampup_value, rampdown_value)
        meters.update('lr', optimizer.param_groups[0]['lr'])

        [batch_input, ema_batch_input,
         target] = to_cuda_if_available([batch_input, ema_batch_input, target])
        LOG.debug("batch_input:{}".format(batch_input.mean()))

        # print(batch_input)
        # exit()

        # Outputs
        ##################################################
        # strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input)
        strong_pred_ema, weak_pred_ema, sof_ema = ema_model(ema_batch_input)
        sof_ema = sof_ema.detach()
        ##################################################

        strong_pred_ema = strong_pred_ema.detach()
        weak_pred_ema = weak_pred_ema.detach()

        ##################################################
        # strong_pred, weak_pred = model(batch_input)
        strong_pred, weak_pred, sof = model(batch_input)
        ##################################################

        ##################################################
        # custom_ema_loss = Custom_BCE_Loss(ema_batch_input, class_criterion1)

        if difficulty_loss == 0:
            LOG.info("############### Deffine Difficulty Loss ###############")
            difficulty_loss = 1
        custom_ema_loss = Custom_BCE_Loss_difficulty(ema_batch_input,
                                                     class_criterion1,
                                                     paramater=loss_w)
        custom_ema_loss.initialize(strong_pred_ema, sof_ema)

        # custom_loss = Custom_BCE_Loss(batch_input, class_criterion1)
        custom_loss = Custom_BCE_Loss_difficulty(batch_input,
                                                 class_criterion1,
                                                 paramater=loss_w)
        custom_loss.initialize(strong_pred, sof)
        ##################################################

        # print(strong_pred.shape)
        # print(strong_pred)
        # print(weak_pred.shape)
        # print(weak_pred)
        # exit()

        loss = None
        # Weak BCE Loss
        # Take the max in the time axis
        # torch.set_printoptions(threshold=10000)
        # print(target[-10])
        # # print(target.max(-2))
        # # print(target.max(-2)[0])
        # print(target.max(-1)[0][-10])
        # exit()

        target_weak = target.max(-2)[0]
        if weak_mask is not None:
            weak_class_loss = class_criterion(weak_pred[weak_mask],
                                              target_weak[weak_mask])
            ema_class_loss = class_criterion(weak_pred_ema[weak_mask],
                                             target_weak[weak_mask])

            print(
                "noraml_weak:",
                class_criterion(weak_pred[weak_mask], target_weak[weak_mask]))

            ##################################################
            custom_weak_class_loss = custom_loss.weak(target_weak, weak_mask)
            custom_ema_class_loss = custom_ema_loss.weak(
                target_weak, weak_mask)
            print("custom_weak:", custom_weak_class_loss)
            ##################################################

            count += 1
            check_cus_weak += custom_weak_class_loss
            # print(custom_weak_class_loss.item())

            if i == 0:
                LOG.debug("target: {}".format(target.mean(-2)))
                LOG.debug("Target_weak: {}".format(target_weak))
                LOG.debug("Target_weak mask: {}".format(
                    target_weak[weak_mask]))
                LOG.debug(custom_weak_class_loss)  ###
                LOG.debug("rampup_value: {}".format(rampup_value))
            meters.update('weak_class_loss',
                          custom_weak_class_loss.item())  ###
            meters.update('Weak EMA loss', custom_ema_class_loss.item())  ###

            # loss = weak_class_loss
            loss = custom_weak_class_loss

            ####################################################################################
            # weak_class_loss = class_criterion(strong_pred[weak_mask], target[weak_mask])
            # ema_class_loss = class_criterion(strong_pred_ema[weak_mask], target[weak_mask])
            # # if i == 0:
            # #     LOG.debug("target: {}".format(target.mean(-2)))
            # #     LOG.debug("Target_weak: {}".format(target))
            # #     LOG.debug("Target_weak mask: {}".format(target[weak_mask]))
            # #     LOG.debug(weak_class_loss)
            # #     LOG.debug("rampup_value: {}".format(rampup_value))
            # meters.update('weak_class_loss', weak_class_loss.item())
            # meters.update('Weak EMA loss', ema_class_loss.item())

            # loss = weak_class_loss
            ####################################################################################

        # Strong BCE loss
        if strong_mask is not None:
            strong_class_loss = class_criterion(strong_pred[strong_mask],
                                                target[strong_mask])
            # meters.update('Strong loss', strong_class_loss.item())

            strong_ema_class_loss = class_criterion(
                strong_pred_ema[strong_mask], target[strong_mask])
            # meters.update('Strong EMA loss', strong_ema_class_loss.item())

            print(
                "normal_strong:",
                class_criterion(strong_pred[strong_mask], target[strong_mask]))

            ##################################################
            custom_strong_class_loss = custom_loss.strong(target, strong_mask)
            meters.update('Strong loss', custom_strong_class_loss.item())

            custom_strong_ema_class_loss = custom_ema_loss.strong(
                target, strong_mask)
            meters.update('Strong EMA loss',
                          custom_strong_ema_class_loss.item())
            print("custom_strong:", custom_strong_class_loss)
            ##################################################

            if loss is not None:
                # loss += strong_class_loss
                loss += custom_strong_class_loss
            else:
                # loss = strong_class_loss
                loss = custom_strong_class_loss

        # print("check_weak:", class_criterion1(weak_pred[weak_mask], target_weak[weak_mask]).mean())
        # print("check_strong:", class_criterion1(strong_pred[strong_mask], target[strong_mask]).mean())
        # print("\n")

        # exit()

        # Teacher-student consistency cost
        if ema_model is not None:

            consistency_cost = cfg.max_consistency_cost * rampup_value
            meters.update('Consistency weight', consistency_cost)
            # Take consistency about strong predictions (all data)
            consistency_loss_strong = consistency_cost * consistency_criterion(
                strong_pred, strong_pred_ema)
            meters.update('Consistency strong', consistency_loss_strong.item())
            if loss is not None:
                loss += consistency_loss_strong
            else:
                loss = consistency_loss_strong

            meters.update('Consistency weight', consistency_cost)
            # Take consistency about weak predictions (all data)
            consistency_loss_weak = consistency_cost * consistency_criterion(
                weak_pred, weak_pred_ema)
            meters.update('Consistency weak', consistency_loss_weak.item())
            if loss is not None:
                loss += consistency_loss_weak
            else:
                loss = consistency_loss_weak

        assert not (np.isnan(loss.item())
                    or loss.item() > 1e5), 'Loss explosion: {}'.format(
                        loss.item())
        assert not loss.item() < 0, 'Loss problem, cannot be negative'
        meters.update('Loss', loss.item())

        # compute gradient and do optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        global_step += 1
        if ema_model is not None:
            update_ema_variables(model, ema_model, 0.999, global_step)

    epoch_time = time.time() - start

    LOG.info('Epoch: {}\t'
             'Time {:.2f}\t'
             '{meters}'.format(epoch, epoch_time, meters=meters))

    print("\ncheck_cus_weak:\n", check_cus_weak / count)
    classes = DatasetDcase2019Task4.get_classes(
        [weak_df, validation_df, synthetic_df])

    # Be careful, frames is max_frames // pooling_time_ratio because max_pooling is applied on time axis in the model
    many_hot_encoder = ManyHotEncoder(classes,
                                      n_frames=cfg.max_frames //
                                      pooling_time_ratio)

    transforms = get_transforms(cfg.max_frames)

    # Divide weak in train and valid
    train_weak_df = weak_df.sample(frac=0.8, random_state=26)
    valid_weak_df = weak_df.drop(train_weak_df.index).reset_index(drop=True)
    train_weak_df = train_weak_df.reset_index(drop=True)
    LOG.debug(valid_weak_df.event_labels.value_counts())
    train_weak_data = DataLoadDf(train_weak_df,
                                 dataset.get_feature_file,
                                 many_hot_encoder.encode_strong_df,
                                 transform=transforms)

    # Divide synthetic in train and valid
    filenames_train = synthetic_df.filename.drop_duplicates().sample(
        frac=0.8, random_state=26)
    train_synth_df = synthetic_df[synthetic_df.filename.isin(filenames_train)]
    valid_synth_df = synthetic_df.drop(
        train_synth_df.index).reset_index(drop=True)

    # Put train_synth in frames so many_hot_encoder can work.
    #  Not doing it for valid, because not using labels (when prediction) and event based metric expect sec.
    train_synth_df_frames = train_synth_df.copy()
Пример #16
0
    def extract_features_from_meta_segment(self,
                                           csv_audio,
                                           feature_dir,
                                           subpart_data=None,
                                           fixed_segment=None):
        """Extract log mel spectrogram features, but the csv needs to be strongly labeled.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the path of the features directory.
            subpart_data: int, number of files to extract features from the csv.
            fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept.
                If segment is True, and >label, it takes the surrounding (allow creating weak labels).
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        self.get_classes(df_meta)
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        ext_name = "_segment_"
        if subpart_data:
            ext_name += str(subpart_data)

        if fixed_segment is not None:
            LOG.debug(
                f" durations before: "
                f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}"
            )
            ext_name += f"fix{fixed_segment}"
            df_meta = self.trunc_pad_segment(df_meta, fixed_segment)
            LOG.debug(
                f" durations after: "
                f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}"
            )

        meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1])
        csv_features = os.path.join(self.metadata_dir,
                                    meta_base + ext_name + meta_ext)

        wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
        df_features = pd.DataFrame()

        path_exists = os.path.exists(csv_features)

        if not path_exists:
            # Loop in all the filenames
            for ind, wav_name in enumerate(df_meta.filename.unique()):
                if ind % 500 == 0:
                    LOG.debug(ind)

                wav_path = os.path.join(wav_dir, wav_name)
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted, deleting...!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    try:
                        audio_len_sec = soundfile.info(wav_path).duration
                    except Exception as e:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                        print(e)
                        continue
                    if audio_len_sec == 0:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                    else:
                        files_exist = True
                        # How many features we can compute from this file ?
                        sub_df = df_meta[df_meta.filename == wav_name]
                        cnt_max = len(sub_df)

                        if cnt_max == 0:
                            break

                        base_wav_name = name_only(wav_name)
                        ext_featname = "_seg"
                        if fixed_segment:
                            ext_featname += f"fix{fixed_segment}"
                            files_exist = False  # We should always recompute because of the randomness of onset offset
                        # Check if files already exist
                        out_filenames = [
                            base_wav_name + ext_featname + str(cnt) + ".npy"
                            for cnt in range(cnt_max)
                        ]
                        for fname in out_filenames:
                            fpath = os.path.join(feature_dir, fname)
                            if not os.path.exists(fpath):
                                files_exist = False
                                break

                        add_item = {
                            "raw_filename": [],
                            "filename": [],
                            "event_labels": []
                        }
                        for ii, (i, row) in enumerate(sub_df.iterrows()):
                            if not pd.isna(row.event_label):
                                if ii > 0:
                                    extnb = str(ii)
                                else:
                                    extnb = ""
                                out_filename = os.path.join(
                                    feature_dir, name_only(wav_name))
                                out_filename += ext_featname + extnb + ".npy"
                                if not files_exist:
                                    sr = soundfile.info(wav_path).samplerate
                                    (audio,
                                     _) = read_audio(wav_path,
                                                     cfg.sample_rate,
                                                     start=int(row.onset * sr),
                                                     stop=int(row.offset * sr))
                                    mel_spec = self.calculate_mel_spec(
                                        audio,
                                        log_feature=self.save_log_feature)
                                    if fixed_segment:
                                        pad_trunc_length = int(
                                            fixed_segment * cfg.sample_rate //
                                            cfg.hop_length)
                                        mel_spec = pad_trunc_seq(
                                            mel_spec, pad_trunc_length)
                                    np.save(out_filename, mel_spec)

                                add_item["raw_filename"].append(wav_name)
                                add_item["filename"].append(out_filename)
                                add_item["event_labels"].append(
                                    row["event_label"])

                        df_features = df_features.append(
                            pd.DataFrame(add_item), ignore_index=True)

            df_features.to_csv(csv_features,
                               sep="\t",
                               header=True,
                               index=False)
            df_features = pd.read_csv(
                csv_features,
                sep="\t")  # Otherwise event_labels is "" and not NaN
        else:
            df_features = self.get_df_from_meta(
                csv_features)  # No subpart data because should be in the name

        LOG.debug("compute features time: %s" % (time.time() - t1))
        return df_features
Пример #17
0
    def extract_features_from_meta(self,
                                   csv_audio,
                                   subpart_data=None,
                                   training=False):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            subpart_data: int, number of files to extract features from the csv.
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        df_all = list()
        feature_fns = list()
        LOG.info('Extracting/loading features')
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        augmentation_funcs = [
            ('orig', None),  # original signal
        ]

        if training:
            augmentation_funcs += [
                # ('lpf4k', partial(lpf, wc=4000, fs=cfg.sample_rate)),
                # ('lpf8k', partial(lpf, wc=8000, fs=cfg.sample_rate)),
                # ('lpf16k', partial(lpf, wc=16000, fs=cfg.sample_rate)),
                # ('ps-6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-6)),
                # ('ps-3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-3)),
                # ('ps+3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=3)),
                # ('ps+6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=6)),
                # ('ts1.25', partial(time_stretch, rate=1.25)),
                # ('ts1.5', partial(time_stretch, rate=1.5)),
                # ('amp0.5', partial(amplitude_scale, coeff=0.5)),
                # ('amp0.75', partial(amplitude_scale, coeff=0.75)),
                # ('hp0.25', partial(hp_reweight, lam=0.25)),
                # ('hp0.75', partial(hp_reweight, lam=0.75))
            ]

        wav_fns = df_meta.filename.unique()
        flag = False
        for ind, wav_name in tqdm(enumerate(wav_fns), total=len(wav_fns)):
            if ind % 500 == 0:
                LOG.debug(ind)

            # verify the audio file is present
            wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
            wav_path = os.path.join(wav_dir, wav_name)
            if os.path.isfile(wav_path):
                # defer loading audio until the need for feature extraction is verified
                audio = None

                # perform all augmentations (including no augmentation)
                for name, func in augmentation_funcs:
                    if name == 'orig':
                        out_filename = os.path.splitext(wav_name)[0] + ".npy"
                    else:
                        out_filename = os.path.splitext(
                            wav_name)[0] + '_' + name + ".npy"
                    out_path = os.path.join(self.feature_dir, out_filename)

                    # add the metadata
                    meta = df_meta.loc[df_meta.filename == wav_name]
                    df_all.append(meta)

                    # for synthetic data with time annotation of events, the meta df will have several entries for
                    # each wav file. therefore, we need to append the feature filename len(meta) times.
                    if len(meta) > 1:
                        feature_fns += [out_filename] * len(meta)
                        if flag:
                            print('Length of meta: {}'.format(len(meta)))
                            flag = False
                    else:
                        feature_fns.append(out_filename)

                    if not os.path.exists(out_path):
                        if audio is None:
                            (audio, _) = read_audio(wav_path, cfg.sample_rate)
                            if audio.shape[0] == 0:
                                print("File %s is corrupted!" % wav_path)
                                del feature_fns[-1]
                                del df_all[-1]

                        # perform any augmentation, extract features, save features
                        # LOG.info('extracting {}'.format(out_filename))
                        if func is not None:
                            mel_spec = self.calculate_mel_spec(func(audio))
                        else:
                            mel_spec = self.calculate_mel_spec(audio)
                        np.save(out_path, mel_spec)

                        LOG.debug("compute features time: %s" %
                                  (time.time() - t1))
            else:
                LOG.error(
                    "File %s is in the csv file but the feature is not extracted!"
                    % wav_path)
                # df_meta = df_meta.drop(df_meta[df_meta.filename == wav_name].index)

        # form the final DataFrame of meta data for features from original and augmented audio
        df_all = pd.concat(df_all).reset_index(drop=True)
        df_all['feature_filename'] = feature_fns

        return df_all
Пример #18
0
    train_weak_df_fr = dfs["train"]
    train_weak_dl_fr = DataLoadDf(train_weak_df_fr, encode_function_label, transform=Compose(trans_fr))

    if type_positive != "label" or type_negative != "label":
        unlabel_df_fr = dataset.get_df_feat_dir(cfg.unlabel, subpart_data=subpart_data, frames_in_sec=frames_in_sec)
        unlabel_dl_fr = DataLoadDf(unlabel_df_fr, encode_function_label, transform=Compose(trans_fr))
        datasets_mean = [train_weak_dl_fr, unlabel_dl_fr]
    else:
        datasets_mean = [train_weak_dl_fr]
    # Normalize
    if resume_training is None:
        scaler = ScalerSum()
        scaler.calculate_scaler(ConcatDataset(datasets_mean))
    else:
        scaler = ScalerSum.load_state_dict(state["scaler"])
    LOG.debug(scaler.mean_)

    trans_fr_scale = trans_fr + [Normalize(scaler)]
    if segment:
        trans_fr_scale.append(Unsqueeze(0))

    for dl in datasets_mean:
        dl.set_transform(Compose(trans_fr_scale))
    print(dl.transform)
    concat_frames = ConcatDataset(datasets_mean)

    trans_fr_sc_embed = deepcopy(trans_fr_scale)
    if not segment:
        trans_fr_sc_embed.append(Unsqueeze(0))

    train_weak_embed = DataLoadDf(train_weak_df_fr, encode_function_label,
Пример #19
0
def train(cfg,
          train_loader,
          model,
          optimizer,
          epoch,
          ema_model=None,
          weak_mask=None,
          strong_mask=None):
    """ One epoch of a Mean Teacher model
    :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch.
    Should return 3 values: teacher input, student input, labels
    :param model: torch.Module, model to be trained, should return a weak and strong prediction
    :param optimizer: torch.Module, optimizer used to train the model
    :param epoch: int, the current epoch of training
    :param ema_model: torch.Module, student model, should return a weak and strong prediction
    :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss)
    :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss)
    """
    class_criterion = nn.BCELoss()
    consistency_criterion_strong = nn.MSELoss()
    lds_criterion = LDSLoss(xi=cfg.vat_xi,
                            eps=cfg.vat_eps,
                            n_power_iter=cfg.vat_n_power_iter)
    [class_criterion, consistency_criterion_strong,
     lds_criterion] = to_cuda_if_available(
         [class_criterion, consistency_criterion_strong, lds_criterion])

    meters = AverageMeterSet()

    LOG.debug("Nb batches: {}".format(len(train_loader)))
    start = time.time()
    rampup_length = len(train_loader) * cfg.n_epoch // 2
    for i, (batch_input, ema_batch_input, target) in enumerate(train_loader):
        global_step = epoch * len(train_loader) + i
        if global_step < rampup_length:
            rampup_value = ramps.sigmoid_rampup(global_step, rampup_length)
        else:
            rampup_value = 1.0

        # Todo check if this improves the performance
        # adjust_learning_rate(optimizer, rampup_value, rampdown_value)
        meters.update('lr', optimizer.param_groups[0]['lr'])

        [batch_input, ema_batch_input,
         target] = to_cuda_if_available([batch_input, ema_batch_input, target])
        LOG.debug(batch_input.mean())
        # Outputs
        strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input)
        strong_pred_ema = strong_pred_ema.detach()
        weak_pred_ema = weak_pred_ema.detach()

        strong_pred, weak_pred = model(batch_input)
        loss = None
        # Weak BCE Loss
        # Take the max in axis 2 (assumed to be time)
        if len(target.shape) > 2:
            target_weak = target.max(-2)[0]
        else:
            target_weak = target

        if weak_mask is not None:
            weak_class_loss = class_criterion(weak_pred[weak_mask],
                                              target_weak[weak_mask])
            ema_class_loss = class_criterion(weak_pred_ema[weak_mask],
                                             target_weak[weak_mask])

            if i == 0:
                LOG.debug("target: {}".format(target.mean(-2)))
                LOG.debug("Target_weak: {}".format(target_weak))
                LOG.debug("Target_weak mask: {}".format(
                    target_weak[weak_mask]))
                LOG.debug(weak_class_loss)
                LOG.debug("rampup_value: {}".format(rampup_value))
            meters.update('weak_class_loss', weak_class_loss.item())

            meters.update('Weak EMA loss', ema_class_loss.item())

            loss = weak_class_loss

        # Strong BCE loss
        if strong_mask is not None:
            strong_class_loss = class_criterion(strong_pred[strong_mask],
                                                target[strong_mask])
            meters.update('Strong loss', strong_class_loss.item())

            strong_ema_class_loss = class_criterion(
                strong_pred_ema[strong_mask], target[strong_mask])
            meters.update('Strong EMA loss', strong_ema_class_loss.item())
            if loss is not None:
                loss += strong_class_loss
            else:
                loss = strong_class_loss

        # Teacher-student consistency cost
        if ema_model is not None:

            consistency_cost = cfg.max_consistency_cost * rampup_value
            meters.update('Consistency weight', consistency_cost)
            # Take only the consistence with weak and unlabel
            consistency_loss_strong = consistency_cost * consistency_criterion_strong(
                strong_pred, strong_pred_ema)
            meters.update('Consistency strong', consistency_loss_strong.item())
            if loss is not None:
                loss += consistency_loss_strong
            else:
                loss = consistency_loss_strong

            meters.update('Consistency weight', consistency_cost)
            # Take only the consistence with weak and unlabel
            consistency_loss_weak = consistency_cost * consistency_criterion_strong(
                weak_pred, weak_pred_ema)
            meters.update('Consistency weak', consistency_loss_weak.item())
            if loss is not None:
                loss += consistency_loss_weak
            else:
                loss = consistency_loss_weak

        # LDS loss
        if cfg.vat_enabled:
            lds_loss = cfg.vat_coeff * lds_criterion(model, batch_input,
                                                     weak_pred)
            LOG.info('loss: {:.3f}, lds loss: {:.3f}'.format(
                loss,
                cfg.vat_coeff * lds_loss.detach().cpu().numpy()))
            loss += lds_loss
        else:
            if i % 25 == 0:
                LOG.info('loss: {:.3f}'.format(loss))

        assert not (np.isnan(loss.item())
                    or loss.item() > 1e5), 'Loss explosion: {}'.format(
                        loss.item())
        assert not loss.item() < 0, 'Loss problem, cannot be negative'
        meters.update('Loss', loss.item())

        # compute gradient and do optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        global_step += 1
        if ema_model is not None:
            update_ema_variables(model, ema_model, 0.999, global_step)

    epoch_time = time.time() - start

    LOG.info('Epoch: {}\t'
             'Time {:.2f}\t'
             '{meters}'.format(epoch, epoch_time, meters=meters))
Пример #20
0
def download_file(result_dir, filename):
    """ download a file from youtube given an audioSet filename. (It takes only a part of the file thanks to
    information provided in the filename)

    Parameters
    ----------

    result_dir : str, result directory which will contain the downloaded file

    filename : str, AudioSet filename to download

    Return
    ------

    list : list, Empty list if the file is downloaded, otherwise contains the filename and the error associated

    """
    LOG.debug(filename)
    tmp_filename = ""
    query_id = filename[1:12]
    segment_start = filename[13:-4].split('_')[0]
    segment_end = filename[13:-4].split('_')[1]
    audio_container = AudioContainer()

    # Define download parameters
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': TMP_FOLDER+'%(id)s.%(ext)s',
        'noplaylist': True,
        'quiet': True,
        'prefer_ffmpeg': True,
        'logger': MyLogger(),
        'audioformat': 'wav'
    }

    try:
        # Download file
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            meta = ydl.extract_info(
                'https://www.youtube.com/watch?v={query_id}'.format(query_id=query_id), download=True)

        audio_formats = [f for f in meta["formats"] if f.get('vcodec') == 'none']

        if audio_formats is []:
            return [filename, "no audio format available"]

        # get the best audio format
        best_audio_format = audio_formats[-1]

        tmp_filename = TMP_FOLDER + query_id + "." + best_audio_format["ext"]

        audio_container.load(filename=tmp_filename, fs=44100, res_type='kaiser_best',
                             start=float(segment_start), stop=float(segment_end))

        # Save segmented audio
        audio_container.filename = filename
        audio_container.detect_file_format()
        audio_container.save(filename=os.path.join(result_dir, filename))

        #Remove temporary file
        os.remove(tmp_filename)
        return []

    except (KeyboardInterrupt, SystemExit):
        # Remove temporary files and current audio file.
        for fpath in glob.glob(TMP_FOLDER + query_id + "*"):
            os.remove(fpath)
        raise

    # youtube-dl error, file often removed
    except (ExtractorError, DownloadError, OSError) as e:
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)

        return [filename, str(e)]

    # multiprocessing can give this error
    except IndexError as e:
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)
        LOG.info(filename)
        LOG.info(str(e))
        return [filename, "Index Error"]
Пример #21
0
def get_dfs(dataset,
            weak_path,
            test_path,
            eval_path=None,
            subpart_data=None,
            valid_list=None,
            frames_in_sec=None,
            segment=False,
            dropna=True,
            unique_fr=False,
            fixed_segment=False):
    weak_df_fr = dataset.get_df_feat_dir(weak_path,
                                         subpart_data=subpart_data,
                                         segment=segment,
                                         frames_in_sec=frames_in_sec,
                                         fixed_segment=fixed_segment)

    if unique_fr:
        if segment:
            raise NotImplementedError("cannot use unique fr with segment")

        def take_mid_fr(x):
            if len(x) > 2:
                x = x.iloc[1:-1]
            return x.sample(n=1)

        l_keep = weak_df_fr.groupby("raw_filename").apply(
            take_mid_fr).filename.tolist()
        weak_df_fr = weak_df_fr[weak_df_fr.filename.isin(l_keep)].reset_index(
            drop=True)

    if dropna:
        weak_df_fr = weak_df_fr.dropna().reset_index(drop=True)
        print("DROP NANS")
    valid_weak_df_fr = weak_df_fr[weak_df_fr.raw_filename.isin(valid_list)]
    train_weak_df_fr = weak_df_fr.drop(
        valid_weak_df_fr.index).reset_index(drop=True)
    valid_weak_df_fr = valid_weak_df_fr.reset_index(drop=True)
    valid_weak_df_fr = valid_weak_df_fr.dropna().reset_index(drop=True)
    valid_weak_df_fr = valid_weak_df_fr[~valid_weak_df_fr.event_labels.
                                        fillna("").str.contains(",")]
    valid_weak_df_fr = valid_weak_df_fr.reset_index(drop=True)

    LOG.debug("len weak df frames : {}".format(len(weak_df_fr)))
    LOG.debug("len train weak df frames : {}".format(len(train_weak_df_fr)))
    LOG.debug("len valid weak df frames : {}".format(len(valid_weak_df_fr)))

    # Todo, remove hard coded stuff
    test_df_fr = dataset.get_df_feat_dir(test_path,
                                         subpart_data=subpart_data,
                                         segment=segment,
                                         frames_in_sec=frames_in_sec,
                                         fixed_segment=0.2)
    test_df_fr = test_df_fr.dropna().reset_index(drop=True)
    test_df_1 = dataset.get_df_feat_dir(test_path,
                                        subpart_data=subpart_data,
                                        segment=segment,
                                        frames_in_sec=frames_in_sec,
                                        fixed_segment=1)
    test_df_1 = test_df_1.dropna().reset_index(drop=True)
    test_df_10 = dataset.get_df_feat_dir(test_path,
                                         subpart_data=subpart_data,
                                         segment=segment,
                                         frames_in_sec=frames_in_sec,
                                         fixed_segment=10)
    test_df_10 = test_df_10.dropna().reset_index(drop=True)

    print("drop test nans")
    if eval_path is not None:
        eval_df_fr = dataset.get_df_feat_dir(eval_path,
                                             subpart_data=subpart_data,
                                             segment=segment,
                                             frames_in_sec=frames_in_sec,
                                             fixed_segment=fixed_segment)
    else:
        eval_df_fr = None

    dfs = {
        "train": train_weak_df_fr,
        "valid": valid_weak_df_fr,
        "test": test_df_fr,
        "test1": test_df_1,
        "test10": test_df_10,
        "eval": eval_df_fr
    }
    return dfs