def set_mydataset_instanceset(self, my_dataset, x_array, output, isTrain): my_dataset.instanceSet = InstanceSet() my_dataset.instanceSet.instanceSet = [] for i in range(0, len(x_array)): newInstance = Instance() line = '' for j in x_array[i]: if j is not None: line = line + str(j) + ',' line = line + output[i] newInstance.setThreeParameters( line, isTrain, len(my_dataset.instanceSet.instanceSet)) my_dataset.instanceSet.instanceSet.append(newInstance)
def addInstance(self, inst): i = 0 nVector = [] if self.instanceSet is not None: nVector = [Instance() for x in range(len(self.instanceSet) + 1)] for i in range(0, len(self.instanceSet)): nVector[i] = self.instanceSet[i] else: nVector = Instance[1] nVector[i] = inst self.instanceSet = nVector
def removeInstance(self, instNum): if instNum < 0 or instNum >= len(self.instanceSet): return aux = [Instance() for x in range(len(self.instanceSet) - 1)] add = 0 for i in range(0, len(self.instanceSet)): if instNum == i: add = 1 else: aux[i - add] = self.instanceSet[i] # Copying the auxiliar to the instanceSet variable self.instanceSet = aux aux = None # avoiding memory leaks (not necessary in this case)
def read_set_from_data_row_array(self, data_raw_array, isTrain): # print("Before try in read_set_from_data_row_array of InstanceSet") try: # Parsing the header of the DB. errorLogger = FormatErrorKeeper() # Declaring an instance parser # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str instance_parser = InstanceParser.init_for_granularity_parser(data_raw_array, isTrain) # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs # print("data_raw_array size" + str(len(data_raw_array))) self.parse_header_from_data_row_array(instance_parser, isTrain) # print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes))) # The attributes statistics are init if we are in train mode. # print("In readSet, isTrain is " + str(isTrain)) if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1: # print("Begin Attributes.initStatistics......") Attributes.initStatistics(Attributes) # A temporal vector is used to store the instances read. # print("Reading the data in read_set_from_data_row_array") tempSet = [] # print("begin instance_parser.getLines()...... ") data_raw_array = self.data_rows new_data_rows = [] number_of_rows= len(data_raw_array) # print("********* There are : " + str(number_of_rows) + "In original Data rows ********* ") # print("********* There are : " + str(len(new_data_rows)) + " In new Data rows ********* ") for i in range(0, number_of_rows): if len(new_data_rows) != 0: # print("Data row: " + str(data_raw_array[i])) newInstance = Instance() # print("how many data already in the instanceSet: " + str(len(tempSet))) newInstance.set_three_parameters_for_granularity_rules(data_raw_array[i], isTrain, len(tempSet)) tempSet.append(newInstance) # The vector of instances is converted to an array of instances. sizeInstance = len(tempSet) # print(" Number of instances read: " + str(sizeInstance)) self.instanceSet = [] for i in range(0, sizeInstance): self.instanceSet.append(tempSet[i]) # print("After converting all instances") # System.out.println("The error logger has any error: "+errorLogger.getNumErrors()); if self.errorLogger.getNumErrors() > 0: errorNumber = len(errorLogger.getAllErrors()) # print("There has been " + str(errorNumber) + "errors in the Dataset format.") for k in range(0, errorLogger.getNumErrors()): errorLogger.getError(k).printErrorInfo() # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format", # errorLogger.getAllErrors()); # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes))) # # If being on a train dataset, the statistics are finished if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1: Attributes.finishStatistics(Attributes) # # close the stream instance_parser.close() # print("File LOADED CORRECTLY!!") except Exception as e: print("Unexpected error in readSet of InstanceSet class :" + str(e))
def readSet(self, fileName, isTrain,file_path): print("Before try in readSet of InstanceSet, fileName is :" + str(fileName) + ".") print("Opening the file in readSet of InstanceSet: " + str(fileName) + ".") try: # Parsing the header of the DB. errorLogger = FormatErrorKeeper() self.data_folder = file_path self.file_to_open = self.data_folder + "\\dataset\\" + fileName # Declaring an instance parser print("In readSet,file_to_open is:" + str(self.file_to_open)) # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str instance_parser = InstanceParser(self.file_to_open, isTrain) # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs print("In readSet finished read file " + str(self.file_to_open)) self.parseHeader(instance_parser, isTrain) print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes))) # The attributes statistics are init if we are in train mode. print("In readSet, isTrain is " + str(isTrain)) if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1: print("Begin Attributes.initStatistics......") Attributes.initStatistics(Attributes) # A temporal vector is used to store the instances read. print("Reading the data") tempSet = [] print("begin instance_parser.getLines()...... ") lines = self.data_lines new_data_lines = [] print("********* There are : " + str(len(lines)) + "In original Data lines ********* ") for line in lines: if ("@relation" not in line) and ("@attribute" not in line) and ("@inputs" not in line) and ( "@outputs" not in line) and ("@data" not in line): new_data_lines.append(line) # print("********* There are : " + str(len(new_data_lines)) + " In new Data lines ********* ") for line in new_data_lines: if new_data_lines is not None: print("Data line: " + str(line)) newInstance = Instance() # print("how many data already in the instanceSet: " + str(len(tempSet))) newInstance.setThreeParameters(line, isTrain, len(tempSet)) tempSet.append(newInstance) # The vector of instances is converted to an array of instances. sizeInstance = len(tempSet) # print(" Number of instances read: " + str(sizeInstance)) self.instanceSet = [] for i in range(0, sizeInstance): self.instanceSet.append(tempSet[i]) # print("After converting all instances") # System.out.println("The error logger has any error: "+errorLogger.getNumErrors()); if self.errorLogger.getNumErrors() > 0: errorNumber = len(errorLogger.getAllErrors()) # print("There has been " + str(errorNumber) + "errors in the Dataset format.") for k in range(0, errorLogger.getNumErrors()): errorLogger.getError(k).printErrorInfo() # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format", # errorLogger.getAllErrors()); # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes))) # # If being on a train dataset, the statistics are finished if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1: Attributes.finishStatistics(Attributes) # # close the stream instance_parser.close() # print("File LOADED CORRECTLY!!") except Exception as e: print("Unexpected error in readSet of InstanceSet class :" + str(e))