def copyHeader(self): p = "" print("copyHeader begin...., P is :" + p) p = "@relation " + Attributes.getRelationName(Attributes) + "\n" print(" after relation P is :" + p) p += Attributes.getInputAttributesHeader(Attributes) print(" after getInputAttributesHeader P is :" + p) p += Attributes.getOutputAttributesHeader(Attributes) print(" after getOutputAttributesHeader P is :" + p) p += Attributes.getInputHeader(Attributes) + "\n" print(" after getInputHeader P is :" + p) p += Attributes.getOutputHeader(Attributes) + "\n" print(" after getOutputHeader P is :" + p) p += "@data\n" print("P is :" + p) return p
def readRegressionSet(self, datasetFile, train): try: #Load in memory a dataset that contains a regression problem self.__instanceSet.readSet(datasetFile, train) self.__nData = self.__instanceSet.getNumInstances() self.__nInputs = Attributes.getInputNumAttributes(Attributes) self.__nVars = self.__nInputs + Attributes.getOutputNumAttributes( Attributes) print("In readRegressionSet , self.__nData is : " + str(self.__nData)) print("In readRegressionSet , self.__nInputs is : " + str(self.__nInputs)) print("In readRegressionSet , self.__nVars is : " + str(self.__nVars)) #outputIntegerheck that there is only one output variable if (Attributes.getOutputNumAttributes(Attributes) > 1): print("Out put attribute: ") outPutAttHeader = Attributes.getOutputAttributesHeader( Attributes) print(outPutAttHeader) print("This algorithm can not process MIMO datasets") print("All outputs but the first one will be removed") exit(1) noOutputs = False if (Attributes.getOutputNumAttributes(Attributes) < 1): print( "This algorithm can not process datasets without outputs") print("Zero-valued output generated") noOutputs = True exit(1) # Initialice and fill our own tables self.__X = [[0.0 for y in range(self.__nInputs)] for x in range(self.__nData)] self.__missing = [[False for y in range(self.__nInputs)] for x in range(self.__nData)] self.__outputInteger = [0 for x in range(self.__nData)] # Maximum and minimum of inputs self.__emax = [None for x in range(self.__nInputs)] self.__emin = [None for x in range(self.__nInputs)] for i in range(0, self.__nInputs): self.__emax[i] = Attributes.getAttributeByPos( Attributes, i).getMaxAttribute() self.__emin[i] = Attributes.getAttributeByPos( Attributes, i).getMinAttribute() # All values are casted into double / integer self.__nClasses = 0 for i in range(0, self.__nData): inst = self.__instanceSet.getInstance(i) for j in range(0, self.__nInputs): self.__X[i][j] = self.__instanceSet.getInputNumericValue( i, j) #inst.getInputRealValues(j); self.__missing[i][j] = inst.getInputMissingValues(j) if (self.__missing[i][j]): self.__X[i][j] = self.__emin[j] - 1 if (noOutputs): self.__outputReal[i] = 0 self.__outputInteger[i] = 0 else: self.__outputReal[ i] = self.__instanceSet.getOutputNumericValue(i, 0) self.__outputInteger[i] = int(self.__outputReal[i]) except OSError as error: print("OS error: {0}".format(error)) except Exception as otherException: print("DBG: Exception in readSet:", sys.exc_info()[0]) print(" In readRegressionSet other Exception is :" + str(otherException)) self.computeStatistics()
def readClassificationSet(self, datasetFile, train): try: # Load in memory a dataset that contains a classification problem print("Inside readClassificationSet, datasetFile :" + str(datasetFile)) print("train is :" + str(train)) print("object instanceSet is :" + str(self.__instanceSet)) if (self.__instanceSet is None): print("self.__instanceSet is Null") else: print("self.__instanceSet is not None, train = " + str(train)) self.__instanceSet.readSet(datasetFile, train) print("begin getNumInstances ...... in readClassificationSet ") self.__nData = self.__instanceSet.getNumInstances() print("In readClassificationSet , self.__nData is : " + str(self.__nData)) self.__nInputs = Attributes.getInputNumAttributes(Attributes) print("In readClassificationSet , self.__nInputs is : " + str(self.__nInputs)) self.__nVars = self.__nInputs + Attributes.getOutputNumAttributes( Attributes) print("In readClassificationSet , self.__nVars is : " + str(self.__nVars)) # outputIntegerheck that there is only one output variable if (Attributes.getOutputNumAttributes(Attributes) > 1): outAttrs = Attributes.getOutputAttributes(Attributes) print("Output Attributes number is bigger than 1") for outAtt in outAttrs: i = 1 print("Att" + str(i) + str(outAtt.getName())) i += 1 print("" + Attributes.getOutputAttributesHeader(Attributes)) print("This algorithm can not process MIMO datasets") print("All outputs but the first one will be removed") exit(1) noOutputs = False if (Attributes.getOutputNumAttributes(Attributes) < 1): print( "This algorithm can not process datasets without outputs" ) print("Zero-valued output generated") noOutputs = True exit(1) print("define all the array in MyDataSet class......") #Initialice and fill our own tables print("The two dimension array X, dimension 1 is :" + str(self.__nData) + " ,Dimension 2 is :" + str(self.__nInputs)) nDataLength = self.__nData nInputLength = self.__nInputs print("nDataLength = " + str(nDataLength)) print("nInputLength = " + str(nInputLength)) #[[0 for j in range(m)] for i in range(n)] first column, then row self.__X = [[None for y in range(nInputLength)] for x in range(nDataLength)] self.__y = [None for x in range(nDataLength)] self.__missing = [[None for y in range(nInputLength)] for x in range(nDataLength)] self.__outputInteger = [None for x in range(nDataLength)] self.__outputReal = [None for x in range(nDataLength)] self.__output = ["" for x in range(nDataLength)] # Maximum and minimum of inputs self.emax = [0.0 for x in range(nInputLength)] self.emin = [0.0 for x in range(nInputLength)] for n in range(0, nInputLength): self.emax[n] = Attributes.getAttributeByPos( Attributes, n).getMaxAttribute() self.emin[n] = Attributes.getAttributeByPos( Attributes, n).getMinAttribute() print("self.emax[n]:" + str(self.emax[n])) print("self.emin[n]:" + str(self.emin[n])) # All values are casted into double/integer self.__nClasses = 0 for i in range(0, nDataLength): inst = self.__instanceSet.getInstance(i) # add class y from instance to y array here self.__y[i] = self.__instanceSet.getInstance(i).y_class for j in range(0, nInputLength): input_Numeric_Value = self.__instanceSet.getInputNumericValue( i, j) print("self.__X [i] = " + str(i) + ",[j] = " + str(j) + ",input_Numeric_Value:" + str(input_Numeric_Value)) self.__X[i][ j] = input_Numeric_Value #inst.getInputRealValues(j); print("after get self.__X[i][j]") self.__missing[i][ j] = inst.getInputMissingValuesWithPos(j) print("after self.__missing[i][j]") if (self.__missing[i][j]): self.__X[i][j] = self.emin[j] - 1 if noOutputs: print("noOutputs==True") self.__outputInteger[i] = 0 self.__output[i] = "" else: print("noOutputs==False") self.__outputInteger[ i] = self.__instanceSet.getOutputNumericValue( i, 0) print("self.__outputInteger[" + str(i) + "] = " + str(self.__outputInteger[i])) self.__output[ i] = self.__instanceSet.getOutputNominalValue( i, 0) if (self.__outputInteger[i] > self.__nClasses): self.__nClasses = self.__outputInteger[i] self.__nClasses = self.__nClasses + 1 print('Number of classes=' + str(self.__nClasses)) except Exception as error: print( "readClassificationSet: Exception in readSet, in readClassificationSet:" + str(error)) self.computeStatistics() self.computeInstancesPerClass()