Пример #1
0
def main(base_dir, base_filename):
    rc1 = ResultsContainer("", "")
    rc1.ar = np.empty(shape=(2,), dtype=object)
    rc1.axis_list = ["classifiers"]
    rc1.labels_dict["classifiers"] = ["svm", "dt"]

    from environment.structures import ClassifierResult
    c1 = ClassifierResult("", "")
    c1.scores["accuracy"] = 0.8
    c2 = ClassifierResult("", "")
    c2.scores["accuracy"] = 0.95

    rc1.ar[0] = c1
    rc1.ar[1] = c2

    rc2 = copy.deepcopy(rc1)
    rc3 = copy.deepcopy(rc1)
    rc4 = copy.deepcopy(rc1)

    d2_rc = ResultsContainer("", "")
    d2_rc.add_dim_layer([rc1, rc2, rc3, rc4], "cv_folds", ["f1", "f2", "f3", "f4"])

    d2rc1 = copy.deepcopy(d2_rc)
    d2rc2 = copy.deepcopy(d2_rc)
    d2rc3 = copy.deepcopy(d2_rc)


    d3_rc = ResultsContainer(base_dir, base_filename)

    d3_rc.add_dim_layer([d2rc1, d2rc2, d2rc3], "fenotype_features",
                                                ["age", "sex", "tissue"])


    return d3_rc
Пример #2
0
    def aggregate_prediction_vectors(self, axis_list_to_preserve):
        """
            Produce new np.array be merging `y_true`, `y_predicted` fields of ClassifierResult objects
                in axis that not present in axis_list_to_preserve. New array is reshaped to complain
                with axis order in axis_list_to_preserve.

            @return: Array of len(axis_axis_list_to_preserve) dimensions
                each element [ClassifierResult] would have joined y_true and y_predicted vectors
            @rtype: np.array
        """

        if len(axis_list_to_preserve) == len(self.axis_list):
            return np.transpose(
                self.ar,
                [self.axis_list.index(axis) for axis in axis_list_to_preserve])

        new_shape = tuple([
            len(self.labels_dict[axis_name])
            for axis_name in axis_list_to_preserve
        ])
        result = np.empty(shape=new_shape, dtype=object)
        index_labels = [
            self.labels_dict[axis] for axis in axis_list_to_preserve
        ]

        for row_def in product(*index_labels):
            # log.debug("processing row: %s", row_def)
            spec_def = {
                axis: val
                for val, axis in zip(row_def, axis_list_to_preserve)
            }
            key = np.array(self.build_axis_mask(spec_def))
            key_for_result = tuple([
                self.inverse_labels_dict[axis_name][label]
                for label, axis_name in zip(row_def, axis_list_to_preserve)
            ])
            sliced = self.ar[tuple(key)]
            if hasattr(sliced, 'flatten'):
                flatten = sliced.flatten()
                new_cr = ClassifierResult("", "")
                new_cr.classifier = "aggregated_result"
                for cr in flatten:
                    # new_cr.labels_encode_vector.extend(cr.labels_encode_vector)
                    if cr is not None:
                        new_cr.y_true.extend(cr.y_true)
                        new_cr.y_predicted.extend(cr.y_predicted)

                # import  ipdb; ipdb.set_trace()
                result[key_for_result] = new_cr

        return result
Пример #3
0
def apply_classifier(exp,
                     block,
                     train_es,
                     test_es,
                     classifier_name,
                     classifier_options=None,
                     fit_options=None,
                     base_folder="/tmp",
                     base_filename="cl"):
    """
        @type train_es: ExpressionSet
        @type test_es: ExpressionSet
    """
    if not classifier_options:
        classifier_options = {}
    if not fit_options:
        fit_options = {}

    target_class_column = train_es.pheno_metadata["user_class_title"]

    # Unpack data
    x_train = train_es.get_assay_data_frame().as_matrix().transpose()
    y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix()

    x_test = test_es.get_assay_data_frame().as_matrix().transpose()
    y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix()

    # Unfortunately svm can't operate with string labels as a target classes
    #   so we need to preprocess labels
    le = preprocessing.LabelEncoder()
    le.fit(y_train)

    y_train_fixed = le.transform(y_train)
    y_test_fixed = le.transform(y_test)

    # Classifier initialization
    fabric, apply_func = classifiers_map[classifier_name]
    log.debug("Classifier options: %s", classifier_options)
    if apply_func is None:
        cl = fabric(**classifier_options)
        cl.fit(x_train, y_train_fixed, **fit_options)
    else:
        raise NotImplementedError()

    # Applying on test partition
    y_test_predicted = cl.predict(x_test)

    # Here we build result object
    cr = ClassifierResult(base_folder, base_filename)

    cr.labels_encode_vector = le.classes_  # Store target class labels

    cr.y_true = y_test_fixed
    cr.y_predicted = y_test_predicted

    cr.classifier = classifier_name
    cr.store_model(cl)
    return [cr], {}
Пример #4
0
    def aggregate_prediction_vectors(self, axis_list_to_preserve):
        """
            Produce new np.array be merging `y_true`, `y_predicted` fields of ClassifierResult objects
                in axis that not present in axis_list_to_preserve. New array is reshaped to complain
                with axis order in axis_list_to_preserve.

            @return: Array of len(axis_axis_list_to_preserve) dimensions
                each element [ClassifierResult] would have joined y_true and y_predicted vectors
            @rtype: np.array
        """

        if len(axis_list_to_preserve) == len(self.axis_list):
            return np.transpose(self.ar, [
                self.axis_list.index(axis)
                for axis in axis_list_to_preserve
            ])

        new_shape = tuple([
            len(self.labels_dict[axis_name])
            for axis_name in axis_list_to_preserve
        ])
        result = np.empty(shape=new_shape, dtype=object)
        index_labels = [self.labels_dict[axis] for axis in axis_list_to_preserve]

        for row_def in product(*index_labels):
            # log.debug("processing row: %s", row_def)
            spec_def = {axis: val for val, axis in zip(row_def, axis_list_to_preserve)}
            key = np.array(self.build_axis_mask(spec_def))
            key_for_result = tuple([
                self.inverse_labels_dict[axis_name][label]
                for label, axis_name
                in zip(row_def, axis_list_to_preserve)
            ])
            sliced = self.ar[tuple(key)]
            if hasattr(sliced, 'flatten'):
                flatten = sliced.flatten()
                new_cr = ClassifierResult("", "")
                new_cr.classifier = "aggregated_result"
                for cr in flatten:
                    # new_cr.labels_encode_vector.extend(cr.labels_encode_vector)
                    if cr is not None:
                        new_cr.y_true.extend(cr.y_true)
                        new_cr.y_predicted.extend(cr.y_predicted)

                # import  ipdb; ipdb.set_trace()
                result[key_for_result] = new_cr

        return result
Пример #5
0
def apply_classifier(
    exp, block,
    train_es, test_es,
    classifier_name, classifier_options=None, fit_options=None,
    base_folder="/tmp", base_filename="cl"
):
    """
        @type train_es: ExpressionSet
        @type test_es: ExpressionSet
    """
    if not classifier_options:
        classifier_options = {}
    if not fit_options:
        fit_options = {}

    target_class_column = train_es.pheno_metadata["user_class_title"]


    # Unpack data
    x_train = train_es.get_assay_data_frame().as_matrix().transpose()
    y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix()

    x_test = test_es.get_assay_data_frame().as_matrix().transpose()
    y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix()

    # Unfortunately svm can't operate with string labels as a target classes
    #   so we need to preprocess labels
    le = preprocessing.LabelEncoder()
    le.fit(y_train)

    y_train_fixed = le.transform(y_train)
    y_test_fixed = le.transform(y_test)

    # Classifier initialization
    fabric, apply_func = classifiers_map[classifier_name]
    log.debug("Classifier options: %s", classifier_options)
    if apply_func is None:
        cl = fabric(**classifier_options)
        cl.fit(x_train, y_train_fixed, **fit_options)
    else:
        raise NotImplementedError()


    # Applying on test partition
    y_test_predicted = cl.predict(x_test)

    # Here we build result object
    cr = ClassifierResult(base_folder, base_filename)

    cr.labels_encode_vector = le.classes_  # Store target class labels

    cr.y_true = y_test_fixed
    cr.y_predicted = y_test_predicted

    cr.classifier = classifier_name
    cr.store_model(cl)
    return [cr], {}
Пример #6
0
def main(base_dir, base_filename):
    rc1 = ResultsContainer("", "")
    rc1.ar = np.empty(shape=(2, ), dtype=object)
    rc1.axis_list = ["classifiers"]
    rc1.labels_dict["classifiers"] = ["svm", "dt"]

    from environment.structures import ClassifierResult
    c1 = ClassifierResult("", "")
    c1.scores["accuracy"] = 0.8
    c2 = ClassifierResult("", "")
    c2.scores["accuracy"] = 0.95

    rc1.ar[0] = c1
    rc1.ar[1] = c2

    rc2 = copy.deepcopy(rc1)
    rc3 = copy.deepcopy(rc1)
    rc4 = copy.deepcopy(rc1)

    d2_rc = ResultsContainer("", "")
    d2_rc.add_dim_layer([rc1, rc2, rc3, rc4], "cv_folds",
                        ["f1", "f2", "f3", "f4"])

    d2rc1 = copy.deepcopy(d2_rc)
    d2rc2 = copy.deepcopy(d2_rc)
    d2rc3 = copy.deepcopy(d2_rc)

    d3_rc = ResultsContainer(base_dir, base_filename)

    d3_rc.add_dim_layer([d2rc1, d2rc2, d2rc3], "fenotype_features",
                        ["age", "sex", "tissue"])

    return d3_rc
Пример #7
0
def apply_classifier(
    exp, block,
    train_es, test_es,
    classifier_name, classifier_options=None, fit_options=None,
    base_folder="/tmp", base_filename="cl"
):
    """
        @type train_es: ExpressionSet
        @type test_es: ExpressionSet
        @type exp: Experiment
        @type block: GenericBlock
    """
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    if not classifier_options:
        classifier_options = {}
    if not fit_options:
        fit_options = {}

    target_class_column = train_es.pheno_metadata["user_class_title"]
    tr_es = train_es.get_assay_data_frame()
    cols = tr_es.columns

    te_es = test_es.get_assay_data_frame()[list(cols)]

    # Unpack data
    x_train = tr_es.as_matrix()
    # x_train = train_es.get_assay_data_frame().as_matrix().transpose()
    y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix()

    x_test = te_es.as_matrix()
    # x_test = test_es.get_assay_data_frame().as_matrix().transpose()
    y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix()

    # Unfortunately svm can't operate with string labels as a target classes
    #   so we need to preprocess labels
    le = preprocessing.LabelEncoder()
    le.fit(y_train)

    y_train_fixed = le.transform(y_train)
    y_test_fixed = le.transform(y_test)

    # Classifier initialization
    fabric, apply_func = classifiers_map[classifier_name]
    # log.debug("Classifier options: %s", classifier_options)
    if apply_func is None:
        cl = get_classifier(fabric, classifier_options, classifier_name, block)
        log.debug("Fitting classifier.")
        cl.fit(x_train, y_train_fixed)
        log.debug("Finished fitting classifier.")
    else:
        raise NotImplementedError()

    log.debug("Applying on test.")
    # Applying on test partition
    y_test_predicted = cl.predict(x_test)
    log.debug("Building result.")
    # Here we build result object
    cr = ClassifierResult(base_folder, base_filename)

    log.debug("Storing labels.")
    cr.labels_encode_vector = le.classes_  # Store target class labels

    log.debug("Storing y.")
    cr.y_true = y_test_fixed
    cr.y_predicted = y_test_predicted

    cr.classifier = classifier_name
    log.debug("Storing model.")
    # TODO Why to store model?
    # cr.store_model(cl)
    log.debug("Finished apply_classifier.")
    return [cr], {}
Пример #8
0
def apply_classifier(
    exp, block,
    train_es, test_es,
    classifier_name, classifier_options=None, fit_options=None,
    base_folder="/tmp", base_filename="cl"
):
    """
        @type train_es: ExpressionSet
        @type test_es: ExpressionSet
        @type exp: Experiment
        @type block: GenericBlock
    """
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    if not classifier_options:
        classifier_options = {}
    if not fit_options:
        fit_options = {}

    target_class_column = train_es.pheno_metadata["user_class_title"]
    tr_es = train_es.get_assay_data_frame()
    cols = tr_es.columns

    te_es = test_es.get_assay_data_frame()[list(cols)]

    # Unpack data
    x_train = tr_es.as_matrix()
    # x_train = train_es.get_assay_data_frame().as_matrix().transpose()
    y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix()

    x_test = te_es.as_matrix()
    # x_test = test_es.get_assay_data_frame().as_matrix().transpose()
    y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix()

    # Unfortunately svm can't operate with string labels as a target classes
    #   so we need to preprocess labels
    le = preprocessing.LabelEncoder()
    le.fit(y_train)

    y_train_fixed = le.transform(y_train)
    y_test_fixed = le.transform(y_test)

    # Classifier initialization
    fabric, apply_func = classifiers_map[classifier_name]
    # log.debug("Classifier options: %s", classifier_options)
    if apply_func is None:
        cl = get_classifier(fabric, classifier_options, classifier_name, block)
        log.debug("Fitting classifier.")
        try:
            log.debug(str(x_train))
            cl.fit(x_train, y_train_fixed)
        except ValueError:
            # if settings.CELERY_DEBUG:
            #     import sys
            #     sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
            #     import pydevd
            #     pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)
            log.debug(str(x_train))
            raise
        log.debug("Finished fitting classifier.")
    else:
        raise NotImplementedError()

    log.debug("Applying on test.")
    # Applying on test partition
    y_test_predicted = cl.predict(x_test)
    log.debug("Building result.")
    # Here we build result object
    cr = ClassifierResult(base_folder, base_filename)

    log.debug("Storing labels.")
    cr.labels_encode_vector = le.classes_  # Store target class labels

    log.debug("Storing y.")
    cr.y_true = y_test_fixed
    cr.y_predicted = y_test_predicted

    cr.classifier = classifier_name
    log.debug("Storing model.")
    # TODO Why to store model?
    # cr.store_model(cl)
    log.debug("Finished apply_classifier.")
    return [cr], {}