Пример #1
0
    def compute_overlap_matrix(hp_items: List[HPItem]):

        ParameterValidator.assert_all_type_and_value(
            [hp_item.encoder for hp_item in hp_items],
            SequenceAbundanceEncoder, 'Overlap matrix computation', 'encoders')

        overlap_matrix = np.zeros((len(hp_items), len(hp_items)))

        import_sequences_as_set = lambda path: set(
            pd.read_csv(path).apply(frozenset, axis=1).values.tolist())

        for index1 in range(len(hp_items)):
            overlap_matrix[index1, index1] = 100
            sequences1 = import_sequences_as_set(
                hp_items[index1].encoder.relevant_sequence_csv_path)
            if len(sequences1) == 0:
                return None
            for index2 in range(index1 + 1, len(hp_items)):
                sequences2 = import_sequences_as_set(
                    hp_items[index2].encoder.relevant_sequence_csv_path)
                if len(sequences2) == 0:
                    return None
                intersection = sequences1.intersection(sequences2)
                overlap_matrix[index1, index2] = round(
                    len(intersection) * 100 /
                    min(len(sequences1), len(sequences2)), 2)
                overlap_matrix[index2, index1] = overlap_matrix[index1, index2]

        return overlap_matrix
Пример #2
0
 def _prepare_reports(self, reports: list, symbol_table: SymbolTable) -> dict:
     if reports is not None:
         ParameterValidator.assert_type_and_value(reports, list, TrainMLModelParser.__name__, "reports")
         report_objects = {report_id: symbol_table.get(report_id) for report_id in reports}
         ParameterValidator.assert_all_type_and_value(report_objects.values(), TrainMLModelReport, TrainMLModelParser.__name__, 'reports')
         return report_objects
     else:
         return {}
Пример #3
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: Path = None) -> SubsamplingInstruction:
        valid_keys = [
            "type", "dataset", "subsampled_dataset_sizes",
            "dataset_export_formats"
        ]
        ParameterValidator.assert_keys(instruction.keys(), valid_keys,
                                       SubsamplingParser.__name__, key)

        dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET)
        ParameterValidator.assert_in_valid_list(instruction['dataset'],
                                                dataset_keys,
                                                SubsamplingParser.__name__,
                                                f'{key}/dataset')

        dataset = symbol_table.get(instruction['dataset'])
        ParameterValidator.assert_type_and_value(
            instruction['subsampled_dataset_sizes'], list,
            SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes')
        ParameterValidator.assert_all_type_and_value(
            instruction['subsampled_dataset_sizes'], int,
            SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes', 1,
            dataset.get_example_count())

        valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DataExporter, 'Exporter', "dataset_export/")
        ParameterValidator.assert_type_and_value(
            instruction['dataset_export_formats'], list,
            SubsamplingParser.__name__, f"{key}/dataset_export_formats")
        ParameterValidator.assert_all_in_valid_list(
            instruction['dataset_export_formats'], valid_export_formats,
            SubsamplingParser.__name__, f"{key}/dataset_export_formats")

        return SubsamplingInstruction(
            dataset=dataset,
            subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'],
            dataset_export_formats=[
                ReflectionHandler.get_class_by_name(export_format + "Exporter",
                                                    "dataset_export/")
                for export_format in instruction['dataset_export_formats']
            ],
            name=key)
Пример #4
0
    def build_object(cls, **kwargs):

        ParameterValidator.assert_keys(kwargs.keys(),
                                       ['metadata_labels', 'name'],
                                       ConfounderAnalysis.__name__,
                                       ConfounderAnalysis.__name__)
        ParameterValidator.assert_type_and_value(kwargs['metadata_labels'],
                                                 list,
                                                 ConfounderAnalysis.__name__,
                                                 'metadata_labels')
        ParameterValidator.assert_all_type_and_value(
            kwargs['metadata_labels'], str, ConfounderAnalysis.__name__,
            'metadata_labels')
        ParameterValidator.assert_type_and_value(kwargs['name'], str,
                                                 ConfounderAnalysis.__name__,
                                                 'name')

        return ConfounderAnalysis(metadata_labels=kwargs['metadata_labels'],
                                  name=kwargs['name'])
Пример #5
0
    def build_object(cls, **kwargs):
        location = "Coefficients"
        coefs_to_plot = [coef.upper() for coef in kwargs["coefs_to_plot"]]

        name = kwargs["name"] if "name" in kwargs else None

        ParameterValidator.assert_all_in_valid_list(
            coefs_to_plot,
            [item.name.upper() for item in CoefficientPlottingSetting],
            location, "coefs_to_plot")

        if CoefficientPlottingSetting.CUTOFF.name in coefs_to_plot:
            cutoff = kwargs["cutoff"]
            ParameterValidator.assert_type_and_value(cutoff, list, location,
                                                     "cutoff")
            ParameterValidator.assert_all_type_and_value(cutoff,
                                                         Number,
                                                         location,
                                                         "cutoff",
                                                         min_inclusive=1e-15)
        else:
            cutoff = []

        if CoefficientPlottingSetting.N_LARGEST.name in coefs_to_plot:
            n_largest = kwargs["n_largest"]
            ParameterValidator.assert_type_and_value(n_largest, list, location,
                                                     "n_largest")
            ParameterValidator.assert_all_type_and_value(n_largest,
                                                         int,
                                                         location,
                                                         "n_largest",
                                                         min_inclusive=1)
        else:
            n_largest = []

        coefs = CoefficientPlottingSettingList()
        for keyword in coefs_to_plot:
            coefs.append(CoefficientPlottingSetting[keyword.upper()])

        return Coefficients(coefs_to_plot=coefs,
                            cutoff=cutoff,
                            n_largest=n_largest,
                            name=name)