Пример #1
0
    def set_mydataset_instanceset(self, my_dataset, x_array, output, isTrain):
        my_dataset.instanceSet = InstanceSet()
        my_dataset.instanceSet.instanceSet = []

        for i in range(0, len(x_array)):

            newInstance = Instance()
            line = ''
            for j in x_array[i]:
                if j is not None:
                    line = line + str(j) + ','
            line = line + output[i]

            newInstance.setThreeParameters(
                line, isTrain, len(my_dataset.instanceSet.instanceSet))
            my_dataset.instanceSet.instanceSet.append(newInstance)
    def addInstance(self, inst):
        i = 0
        nVector = []
        if self.instanceSet is not None:
            nVector = [Instance() for x in range(len(self.instanceSet) + 1)]
            for i in range(0, len(self.instanceSet)):
                nVector[i] = self.instanceSet[i]

        else:
            nVector = Instance[1]

        nVector[i] = inst
        self.instanceSet = nVector
    def removeInstance(self, instNum):
        if instNum < 0 or instNum >= len(self.instanceSet):
            return
        aux = [Instance() for x in range(len(self.instanceSet) - 1)]
        add = 0
        for i in range(0, len(self.instanceSet)):
            if instNum == i:
                add = 1
            else:
                aux[i - add] = self.instanceSet[i]

        # Copying the auxiliar to the instanceSet variable
        self.instanceSet = aux
        aux = None  # avoiding memory leaks (not necessary in this case)
    def read_set_from_data_row_array(self, data_raw_array, isTrain):
        # print("Before try in read_set_from_data_row_array of InstanceSet")
        try:
            # Parsing the header of the DB.
            errorLogger = FormatErrorKeeper()
            # Declaring an instance parser

            # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str
            instance_parser = InstanceParser.init_for_granularity_parser(data_raw_array, isTrain)
            # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
            # print("data_raw_array size" + str(len(data_raw_array)))
            self.parse_header_from_data_row_array(instance_parser, isTrain)
            # print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes)))
            # The attributes statistics are init if we are in train mode.
            # print("In readSet, isTrain is " + str(isTrain))
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                # print("Begin Attributes.initStatistics......")
                Attributes.initStatistics(Attributes)
            # A temporal vector is used to store the instances read.

            # print("Reading the data in read_set_from_data_row_array")
            tempSet = []
            # print("begin instance_parser.getLines()...... ")
            data_raw_array = self.data_rows
            new_data_rows = []
            number_of_rows= len(data_raw_array)
            # print("*********  There are : " + str(number_of_rows) + "In original Data rows ********* ")

            # print("*********  There are : " + str(len(new_data_rows)) + " In new Data rows ********* ")
            for i in range(0, number_of_rows):
                if len(new_data_rows) != 0:
                    # print("Data row: " + str(data_raw_array[i]))
                    newInstance = Instance()
                    # print("how many data already in the instanceSet: " + str(len(tempSet)))
                    newInstance.set_three_parameters_for_granularity_rules(data_raw_array[i], isTrain, len(tempSet))
                    tempSet.append(newInstance)

                # The vector of instances is converted to an array of instances.
            sizeInstance = len(tempSet)
            # print(" Number of instances read: " + str(sizeInstance))
            self.instanceSet = []

            for i in range(0, sizeInstance):
                self.instanceSet.append(tempSet[i])
            # print("After converting all instances")
            # System.out.println("The error logger has any error: "+errorLogger.getNumErrors());
            if self.errorLogger.getNumErrors() > 0:
                errorNumber = len(errorLogger.getAllErrors())
                # print("There has been " + str(errorNumber) + "errors in the Dataset format.")
                for k in range(0, errorLogger.getNumErrors()):
                    errorLogger.getError(k).printErrorInfo()

            # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
            #           errorLogger.getAllErrors());
            # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes)))
            # # If being on a train dataset, the statistics are finished
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                Attributes.finishStatistics(Attributes)
            # # close the stream
            instance_parser.close()
            # print("File LOADED CORRECTLY!!")
        except Exception as e:
            print("Unexpected error in readSet of InstanceSet class :" + str(e))
    def readSet(self, fileName, isTrain,file_path):
        print("Before try in readSet of InstanceSet, fileName is :" + str(fileName) + ".")
        print("Opening the file in readSet of InstanceSet: " + str(fileName) + ".")
        try:
            # Parsing the header of the DB.
            errorLogger = FormatErrorKeeper()
            self.data_folder = file_path
            self.file_to_open = self.data_folder + "\\dataset\\" + fileName
            # Declaring an instance parser
            print("In readSet,file_to_open is:" + str(self.file_to_open))
            # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str
            instance_parser = InstanceParser(self.file_to_open, isTrain)
            # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
            print("In readSet finished read file " + str(self.file_to_open))
            self.parseHeader(instance_parser, isTrain)
            print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes)))
            # The attributes statistics are init if we are in train mode.
            print("In readSet, isTrain is " + str(isTrain))
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                print("Begin Attributes.initStatistics......")
                Attributes.initStatistics(Attributes)
            # A temporal vector is used to store the instances read.

            print("Reading the data")
            tempSet = []
            print("begin instance_parser.getLines()...... ")
            lines = self.data_lines
            new_data_lines = []
            print("*********  There are : " + str(len(lines)) + "In original Data lines ********* ")
            for line in lines:
                if ("@relation" not in line) and ("@attribute" not in line) and ("@inputs" not in line) and (
                        "@outputs" not in line) and ("@data" not in line):
                    new_data_lines.append(line)
            # print("*********  There are : " + str(len(new_data_lines)) + " In new Data lines ********* ")
            for line in new_data_lines:
                if new_data_lines is not None:
                    print("Data line: " + str(line))
                    newInstance = Instance()
                    # print("how many data already in the instanceSet: " + str(len(tempSet)))
                    newInstance.setThreeParameters(line, isTrain, len(tempSet))
                    tempSet.append(newInstance)

                # The vector of instances is converted to an array of instances.
            sizeInstance = len(tempSet)
            # print(" Number of instances read: " + str(sizeInstance))
            self.instanceSet = []

            for i in range(0, sizeInstance):
                self.instanceSet.append(tempSet[i])
            # print("After converting all instances")
            # System.out.println("The error logger has any error: "+errorLogger.getNumErrors());
            if self.errorLogger.getNumErrors() > 0:
                errorNumber = len(errorLogger.getAllErrors())
                # print("There has been " + str(errorNumber) + "errors in the Dataset format.")
                for k in range(0, errorLogger.getNumErrors()):
                    errorLogger.getError(k).printErrorInfo()

            # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
            #           errorLogger.getAllErrors());
            # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes)))
            # # If being on a train dataset, the statistics are finished
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                Attributes.finishStatistics(Attributes)
            # # close the stream
            instance_parser.close()
            # print("File LOADED CORRECTLY!!")
        except Exception as e:
            print("Unexpected error in readSet of InstanceSet class :" + str(e))