def main(base_dir, base_filename): rc1 = ResultsContainer("", "") rc1.ar = np.empty(shape=(2,), dtype=object) rc1.axis_list = ["classifiers"] rc1.labels_dict["classifiers"] = ["svm", "dt"] from environment.structures import ClassifierResult c1 = ClassifierResult("", "") c1.scores["accuracy"] = 0.8 c2 = ClassifierResult("", "") c2.scores["accuracy"] = 0.95 rc1.ar[0] = c1 rc1.ar[1] = c2 rc2 = copy.deepcopy(rc1) rc3 = copy.deepcopy(rc1) rc4 = copy.deepcopy(rc1) d2_rc = ResultsContainer("", "") d2_rc.add_dim_layer([rc1, rc2, rc3, rc4], "cv_folds", ["f1", "f2", "f3", "f4"]) d2rc1 = copy.deepcopy(d2_rc) d2rc2 = copy.deepcopy(d2_rc) d2rc3 = copy.deepcopy(d2_rc) d3_rc = ResultsContainer(base_dir, base_filename) d3_rc.add_dim_layer([d2rc1, d2rc2, d2rc3], "fenotype_features", ["age", "sex", "tissue"]) return d3_rc
def aggregate_prediction_vectors(self, axis_list_to_preserve): """ Produce new np.array be merging `y_true`, `y_predicted` fields of ClassifierResult objects in axis that not present in axis_list_to_preserve. New array is reshaped to complain with axis order in axis_list_to_preserve. @return: Array of len(axis_axis_list_to_preserve) dimensions each element [ClassifierResult] would have joined y_true and y_predicted vectors @rtype: np.array """ if len(axis_list_to_preserve) == len(self.axis_list): return np.transpose( self.ar, [self.axis_list.index(axis) for axis in axis_list_to_preserve]) new_shape = tuple([ len(self.labels_dict[axis_name]) for axis_name in axis_list_to_preserve ]) result = np.empty(shape=new_shape, dtype=object) index_labels = [ self.labels_dict[axis] for axis in axis_list_to_preserve ] for row_def in product(*index_labels): # log.debug("processing row: %s", row_def) spec_def = { axis: val for val, axis in zip(row_def, axis_list_to_preserve) } key = np.array(self.build_axis_mask(spec_def)) key_for_result = tuple([ self.inverse_labels_dict[axis_name][label] for label, axis_name in zip(row_def, axis_list_to_preserve) ]) sliced = self.ar[tuple(key)] if hasattr(sliced, 'flatten'): flatten = sliced.flatten() new_cr = ClassifierResult("", "") new_cr.classifier = "aggregated_result" for cr in flatten: # new_cr.labels_encode_vector.extend(cr.labels_encode_vector) if cr is not None: new_cr.y_true.extend(cr.y_true) new_cr.y_predicted.extend(cr.y_predicted) # import ipdb; ipdb.set_trace() result[key_for_result] = new_cr return result
def apply_classifier(exp, block, train_es, test_es, classifier_name, classifier_options=None, fit_options=None, base_folder="/tmp", base_filename="cl"): """ @type train_es: ExpressionSet @type test_es: ExpressionSet """ if not classifier_options: classifier_options = {} if not fit_options: fit_options = {} target_class_column = train_es.pheno_metadata["user_class_title"] # Unpack data x_train = train_es.get_assay_data_frame().as_matrix().transpose() y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix() x_test = test_es.get_assay_data_frame().as_matrix().transpose() y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix() # Unfortunately svm can't operate with string labels as a target classes # so we need to preprocess labels le = preprocessing.LabelEncoder() le.fit(y_train) y_train_fixed = le.transform(y_train) y_test_fixed = le.transform(y_test) # Classifier initialization fabric, apply_func = classifiers_map[classifier_name] log.debug("Classifier options: %s", classifier_options) if apply_func is None: cl = fabric(**classifier_options) cl.fit(x_train, y_train_fixed, **fit_options) else: raise NotImplementedError() # Applying on test partition y_test_predicted = cl.predict(x_test) # Here we build result object cr = ClassifierResult(base_folder, base_filename) cr.labels_encode_vector = le.classes_ # Store target class labels cr.y_true = y_test_fixed cr.y_predicted = y_test_predicted cr.classifier = classifier_name cr.store_model(cl) return [cr], {}
def aggregate_prediction_vectors(self, axis_list_to_preserve): """ Produce new np.array be merging `y_true`, `y_predicted` fields of ClassifierResult objects in axis that not present in axis_list_to_preserve. New array is reshaped to complain with axis order in axis_list_to_preserve. @return: Array of len(axis_axis_list_to_preserve) dimensions each element [ClassifierResult] would have joined y_true and y_predicted vectors @rtype: np.array """ if len(axis_list_to_preserve) == len(self.axis_list): return np.transpose(self.ar, [ self.axis_list.index(axis) for axis in axis_list_to_preserve ]) new_shape = tuple([ len(self.labels_dict[axis_name]) for axis_name in axis_list_to_preserve ]) result = np.empty(shape=new_shape, dtype=object) index_labels = [self.labels_dict[axis] for axis in axis_list_to_preserve] for row_def in product(*index_labels): # log.debug("processing row: %s", row_def) spec_def = {axis: val for val, axis in zip(row_def, axis_list_to_preserve)} key = np.array(self.build_axis_mask(spec_def)) key_for_result = tuple([ self.inverse_labels_dict[axis_name][label] for label, axis_name in zip(row_def, axis_list_to_preserve) ]) sliced = self.ar[tuple(key)] if hasattr(sliced, 'flatten'): flatten = sliced.flatten() new_cr = ClassifierResult("", "") new_cr.classifier = "aggregated_result" for cr in flatten: # new_cr.labels_encode_vector.extend(cr.labels_encode_vector) if cr is not None: new_cr.y_true.extend(cr.y_true) new_cr.y_predicted.extend(cr.y_predicted) # import ipdb; ipdb.set_trace() result[key_for_result] = new_cr return result
def apply_classifier( exp, block, train_es, test_es, classifier_name, classifier_options=None, fit_options=None, base_folder="/tmp", base_filename="cl" ): """ @type train_es: ExpressionSet @type test_es: ExpressionSet """ if not classifier_options: classifier_options = {} if not fit_options: fit_options = {} target_class_column = train_es.pheno_metadata["user_class_title"] # Unpack data x_train = train_es.get_assay_data_frame().as_matrix().transpose() y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix() x_test = test_es.get_assay_data_frame().as_matrix().transpose() y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix() # Unfortunately svm can't operate with string labels as a target classes # so we need to preprocess labels le = preprocessing.LabelEncoder() le.fit(y_train) y_train_fixed = le.transform(y_train) y_test_fixed = le.transform(y_test) # Classifier initialization fabric, apply_func = classifiers_map[classifier_name] log.debug("Classifier options: %s", classifier_options) if apply_func is None: cl = fabric(**classifier_options) cl.fit(x_train, y_train_fixed, **fit_options) else: raise NotImplementedError() # Applying on test partition y_test_predicted = cl.predict(x_test) # Here we build result object cr = ClassifierResult(base_folder, base_filename) cr.labels_encode_vector = le.classes_ # Store target class labels cr.y_true = y_test_fixed cr.y_predicted = y_test_predicted cr.classifier = classifier_name cr.store_model(cl) return [cr], {}
def main(base_dir, base_filename): rc1 = ResultsContainer("", "") rc1.ar = np.empty(shape=(2, ), dtype=object) rc1.axis_list = ["classifiers"] rc1.labels_dict["classifiers"] = ["svm", "dt"] from environment.structures import ClassifierResult c1 = ClassifierResult("", "") c1.scores["accuracy"] = 0.8 c2 = ClassifierResult("", "") c2.scores["accuracy"] = 0.95 rc1.ar[0] = c1 rc1.ar[1] = c2 rc2 = copy.deepcopy(rc1) rc3 = copy.deepcopy(rc1) rc4 = copy.deepcopy(rc1) d2_rc = ResultsContainer("", "") d2_rc.add_dim_layer([rc1, rc2, rc3, rc4], "cv_folds", ["f1", "f2", "f3", "f4"]) d2rc1 = copy.deepcopy(d2_rc) d2rc2 = copy.deepcopy(d2_rc) d2rc3 = copy.deepcopy(d2_rc) d3_rc = ResultsContainer(base_dir, base_filename) d3_rc.add_dim_layer([d2rc1, d2rc2, d2rc3], "fenotype_features", ["age", "sex", "tissue"]) return d3_rc
def apply_classifier( exp, block, train_es, test_es, classifier_name, classifier_options=None, fit_options=None, base_folder="/tmp", base_filename="cl" ): """ @type train_es: ExpressionSet @type test_es: ExpressionSet @type exp: Experiment @type block: GenericBlock """ if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) if not classifier_options: classifier_options = {} if not fit_options: fit_options = {} target_class_column = train_es.pheno_metadata["user_class_title"] tr_es = train_es.get_assay_data_frame() cols = tr_es.columns te_es = test_es.get_assay_data_frame()[list(cols)] # Unpack data x_train = tr_es.as_matrix() # x_train = train_es.get_assay_data_frame().as_matrix().transpose() y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix() x_test = te_es.as_matrix() # x_test = test_es.get_assay_data_frame().as_matrix().transpose() y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix() # Unfortunately svm can't operate with string labels as a target classes # so we need to preprocess labels le = preprocessing.LabelEncoder() le.fit(y_train) y_train_fixed = le.transform(y_train) y_test_fixed = le.transform(y_test) # Classifier initialization fabric, apply_func = classifiers_map[classifier_name] # log.debug("Classifier options: %s", classifier_options) if apply_func is None: cl = get_classifier(fabric, classifier_options, classifier_name, block) log.debug("Fitting classifier.") cl.fit(x_train, y_train_fixed) log.debug("Finished fitting classifier.") else: raise NotImplementedError() log.debug("Applying on test.") # Applying on test partition y_test_predicted = cl.predict(x_test) log.debug("Building result.") # Here we build result object cr = ClassifierResult(base_folder, base_filename) log.debug("Storing labels.") cr.labels_encode_vector = le.classes_ # Store target class labels log.debug("Storing y.") cr.y_true = y_test_fixed cr.y_predicted = y_test_predicted cr.classifier = classifier_name log.debug("Storing model.") # TODO Why to store model? # cr.store_model(cl) log.debug("Finished apply_classifier.") return [cr], {}
def apply_classifier( exp, block, train_es, test_es, classifier_name, classifier_options=None, fit_options=None, base_folder="/tmp", base_filename="cl" ): """ @type train_es: ExpressionSet @type test_es: ExpressionSet @type exp: Experiment @type block: GenericBlock """ if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) if not classifier_options: classifier_options = {} if not fit_options: fit_options = {} target_class_column = train_es.pheno_metadata["user_class_title"] tr_es = train_es.get_assay_data_frame() cols = tr_es.columns te_es = test_es.get_assay_data_frame()[list(cols)] # Unpack data x_train = tr_es.as_matrix() # x_train = train_es.get_assay_data_frame().as_matrix().transpose() y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix() x_test = te_es.as_matrix() # x_test = test_es.get_assay_data_frame().as_matrix().transpose() y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix() # Unfortunately svm can't operate with string labels as a target classes # so we need to preprocess labels le = preprocessing.LabelEncoder() le.fit(y_train) y_train_fixed = le.transform(y_train) y_test_fixed = le.transform(y_test) # Classifier initialization fabric, apply_func = classifiers_map[classifier_name] # log.debug("Classifier options: %s", classifier_options) if apply_func is None: cl = get_classifier(fabric, classifier_options, classifier_name, block) log.debug("Fitting classifier.") try: log.debug(str(x_train)) cl.fit(x_train, y_train_fixed) except ValueError: # if settings.CELERY_DEBUG: # import sys # sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') # import pydevd # pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) log.debug(str(x_train)) raise log.debug("Finished fitting classifier.") else: raise NotImplementedError() log.debug("Applying on test.") # Applying on test partition y_test_predicted = cl.predict(x_test) log.debug("Building result.") # Here we build result object cr = ClassifierResult(base_folder, base_filename) log.debug("Storing labels.") cr.labels_encode_vector = le.classes_ # Store target class labels log.debug("Storing y.") cr.y_true = y_test_fixed cr.y_predicted = y_test_predicted cr.classifier = classifier_name log.debug("Storing model.") # TODO Why to store model? # cr.store_model(cl) log.debug("Finished apply_classifier.") return [cr], {}