Пример #1
0
    def model_search(self):
        """Do a model search with binary search.

        Returns
        -------
        results : pandas.DataFrame
        selected_model : dict or pandas.Series
            Containing with keys:
                model_file
                model_iteration
                mean_loss
                mean_acc
                f1_weighted
        """
        results = {}
        # Don't allow the zero index; We should never select an
        #  untrained model!
        index = 1 if len(self.param_list) > 0 else 0
        end_ind = len(self.param_list) - 1

        logger.info("Linear Model Search from:{} to:{} [total #: {}]".format(
            utils.filebase(self.param_list[index]),
            utils.filebase(self.param_list[end_ind]),
            len(self.param_list) - 1))

        # kinda hacky, but it'll do for now.
        increment_amount = int(np.round(min(max(10**(np.log10(
            len(self.param_list)) - 1), 1), 25)))

        while index < end_ind:
            logger.info("Evaluating {}".format(
                utils.filebase(self.param_list[index])))

            if index not in results:
                model = self.param_list[index]
                results[index] = self.evaluate_model(model)

            index += increment_amount

        results_df = pandas.DataFrame.from_dict(results, orient='index')
        selected_index = results_df['f1_weighted'].idxmax()

        # Now select the one with the lowest score
        logger.info(
            utils.colored("Selected model index:{} / params: {}".format(
                selected_index,
                utils.filebase(self.param_list[selected_index]))))
        logger.info("For reference, here's the model selection results:")
        logger.info("\n{}".format(results_df.to_string()))
        return results_df, results[selected_index]
Пример #2
0
    def evaluate_model(self, params_file):
        """Evaluate a model as defined by a params file, returning
        a single value (mean loss by default) to compare over the validation
        set."""
        model = hcnn.train.models.NetworkManager.deserialize_npz(
            params_file)
        # Results contains one point accross the whole dataset
        logger.debug("Evaluating params {}".format(params_file))
        validation_predictions_df = predict.predict_many(
            self.valid_df, model, self.slicer_fx, self.t_len,
            show_progress=True).dropna()

        evaluation_results = pandas.Series({
            "mean_loss": validation_predictions_df['loss'].mean(),
            "mean_acc": sklearn.metrics.accuracy_score(
                validation_predictions_df['y_true'].astype(np.int),
                validation_predictions_df['y_pred'].astype(np.int)),
            "f1_weighted": sklearn.metrics.f1_score(
                validation_predictions_df['y_true'].astype(np.int),
                validation_predictions_df['y_pred'].astype(np.int),
                average='weighted')
        })
        # Include the metadata in the series.
        model_iteration = utils.filebase(params_file)[6:]
        model_iteration = int(model_iteration) if model_iteration.isdigit() \
            else model_iteration

        return evaluation_results.append(pandas.Series({
            "model_file": params_file,
            "model_iteration": model_iteration
        }))
Пример #3
0
    def model_search(self):
        """Do a model search with binary search.

        Returns
        -------
        results : pandas.DataFrame
        selected_model : dict or pandas.Series
            Containing with keys:
                model_file
                model_iteration
                mean_loss
                ...
        """
        results = {}
        # Don't allow the zero index; We should never select an
        #  untrained model!
        start_ind = 1 if len(self.param_list) > 0 else 0
        end_ind = len(self.param_list) - 1
        # start_ind = len(self.param_list)/2
        # end_ind = start_ind
        while start_ind != end_ind:
            logger.info("Model Search - L:{} R:{}".format(
                utils.filebase(self.param_list[start_ind]),
                utils.filebase(self.param_list[end_ind])))
            if start_ind not in results:
                model = self.param_list[start_ind]
                results[start_ind] = self.evaluate_model(model)
            if end_ind not in results:
                model = self.param_list[end_ind]
                results[end_ind] = self.evaluate_model(model)
            best_model = self.compare_models(
                results[start_ind], results[end_ind])

            new_ind = np.int(np.round((end_ind + start_ind) / 2))
            if (end_ind - start_ind) > 1:
                start_ind, end_ind = (new_ind, end_ind) if best_model >= 0 \
                    else (start_ind, new_ind)
            else:
                start_ind, end_ind = (new_ind, new_ind)

        logger.info("Selected model {} / {}".format(
            start_ind, self.param_list[start_ind]))
        return pandas.DataFrame.from_dict(results, orient='index'), \
            results[start_ind]
Пример #4
0
def test_filebase():
    fnames = ['y', 'y.z', 'x/y.z', 'x.y.z']
    results = ['y', 'y', 'y', 'x.y']
    for fn, res in zip(fnames, results):
        yield __eq, utils.filebase(fn), res
Пример #5
0
def test_filebase():
    fnames = ['y', 'y.z', 'x/y.z', 'x.y.z']
    results = ['y', 'y', 'y', 'x.y']
    for fn, res in zip(fnames, results):
        yield __eq, utils.filebase(fn), res
Пример #6
0
 def features_path_for_audio(audio_path):
     return os.path.join(write_dir,
                         utils.filebase(audio_path) + ".npz")