示例#1
0
    def __init__(self,
                 number_of_appropriate_bios_records: int = 2000,
                 number_of_training_records: int = 2115,
                 number_of_test_records: int = 0):

        self.number_of_appropriate_bios_records = number_of_appropriate_bios_records
        self.inappropriate_bios = PandasUtils.select_series(
            FileUtils.read_excel_file(self.inappropriate_bios_path),
            self.ColNames.BIO.value)
        self.number_of_inappropriate_bios_records = len(
            self.inappropriate_bios.index)
        self.number_of_all_bios_records = self.number_of_appropriate_bios_records + self.number_of_inappropriate_bios_records
        self.number_of_training_records = number_of_training_records
        self.number_of_test_records = min(
            self.number_of_all_bios_records -
            self.number_of_training_records, number_of_test_records
        ) if number_of_test_records else self.number_of_all_bios_records - self.number_of_training_records
        self.bios = FileUtils.read_excel_file(self.bios_path)
        self.appropriate_bios = PandasUtils.select_series(
            self.bios.head(self.number_of_appropriate_bios_records),
            self.ColNames.BIO.value)

        self.__generate_training_and_test_series()
        self.model = None
        self.predictions = []

        Logger.info(
            "Number of appropriate labeled bios records is : {}".format(
                self.number_of_appropriate_bios_records))
        Logger.info(
            "Number of inappropriate labeled bios records is : {}".format(
                self.number_of_inappropriate_bios_records))
        Logger.info("Number of training_records is : {}".format(
            self.number_of_training_records))
        Logger.info("Number of test records is : {}".format(
            self.number_of_test_records))