Пример #1
0
    def processInputsAndOutputs(self, isTrain, inputsDef, outputsDef,
                                outputAttrNames, inputAttrNames):
        # After parsing the header, the inputs and the outputs are prepared.
        print("Processing inputs and outputs")
        self.outputInfered = False  #set default value
        if isTrain is True:
            #print("isTrain is True")
            if inputsDef == False and outputsDef == False:
                #print("is neither inputAtt no outputAtt")
                posHere = Attributes.getNumAttributes(self) - 1

                outputAttrNames.append(
                    Attributes.getAttributeByPos(self, posHere).getName())
                inputAttrNames = Attributes.getAttributesExcept(
                    Attributes, outputAttrNames)
                self.outputInfered = True
            elif inputsDef == False and outputsDef == True:
                #print("inputsDef == False and outputsDef == True")
                inputAttrNames = Attributes.getAttributesExcept(
                    Attributes, outputAttrNames)
            elif inputsDef == True and outputsDef == False:
                #print("inputsDef == True and outputsDef == False")
                outputAttrNames = Attributes.getAttributesExcept(
                    Attributes, inputAttrNames)
                self.outputInfered = True
            #print("setOutputInputAttributes begin: ")
            Attributes.setOutputInputAttributes(Attributes, inputAttrNames,
                                                outputAttrNames)
class InstanceSet:
    # /////////////////////////////////////////////////////////////////////////////
    # //////////////// ATTRIBUTES OF THE INSTANCESET CLASS ////////////////////////
    # /////////////////////////////////////////////////////////////////////////////

    # Attribute where all the instances of the DB are stored.

    instanceSet = []

    # String where the header of the file is stored.

    header = ""

    # String where only the attributes definition header is stored
    attHeader = ""
    # '''
    #  * Object that collects all the errors happened while reading the test and
    #  * train datasets.
    # '''
    errorLogger = FormatErrorKeeper()

    # This object contains the attributes definitions

    attributes = InstanceAttributes()
    # '''
    #  * It indicates if the attributes has not be stored as non-static, permiting
    #  * the load of different datasets
    # '''
    storeAttributesAsNonStatic = None

    # It indicates that the output attribute has been infered as the last one

    outputInfered = None

    # /////////////////////////////////////////////////////////////////////////////
    # ///////////////// METHODS OF THE INSTANCESET CLASS //////////////////////////
    # /////////////////////////////////////////////////////////////////////////////

    # It instances a new instance of InstanceSet
    # data_folder = PureWindowsPath('C:/phd_experiments/threeAlgorithmsComparizasion/threeAlgorithmsComparizasion/ecoli')
    data_folder = None
    file_to_open = None
    data_lines = None

    # added by rui
    data_rows = None
    attributes_insance = None

    def __init__(self):
        # print("In __init__ method in InstanceSet.")
        self.storeAttributesAsNonStatic = False
        self.attributes = None
        self.attributes_insance = Attributes()

    def InstanceSetWithNonSAtrr(self, nonStaticAttributes):
        self.storeAttributesAsNonStatic = nonStaticAttributes
        # if ( storeAttributesAsNonStatic ) Attributes.clearAll();
        self.attributes = None

    def InstanceSetWithIns(self, ins):
        self.instanceSet = ins.instanSet.copy()

        self.header = str(ins.header)
        self.attHeader = str(ins.attHeader)
        self.attributes = str(ins.attributes)
        self.storeAttributesAsNonStatic = ins.storeAttributesAsNonStatic

    # end InstanceSet

    # * InstanceSet
    # *
    # * This constructor permit define if the attribute's definition need to be
    # * stored as non-static (nonStaticAttributes = true). Otherwise, if
    # * nonStaticAttributes = false, using this constructor is equivalent to use
    # * the constructor by default.

    # * Creates a new InstanceSet with the header and Instances from the passed object
    # * It performs a deep (new allocated) copy.
    # * @param is Original InstanceSe

    # * setAttributesAsNonStatic
    # *
    # * It stores the static-defined attributes in the class Attributes as
    # * non static in the object attributes. After this it does not remove the
    # * static-definition of the Attributes; this is in that way to permit to
    # * call this functions for differents datasets from the same problem, such
    # * as, a train dataset and the correspondent test dataset.
    # */

    def setAttributesAsNonStatic(self):
        self.attributes = InstanceAttributes()
        self.attributes.copyStaticAttributes()

        self.storeAttributesAsNonStatic = True

    # end setAttributesAsNonStatic

    # /**
    #  * getAttributeDefinitions
    #  *
    #  * It does return the definition of the attibutes contained in the dataset.
    #  *
    #  * @return InstanceAttributes contains the attribute's definitions.

    def getAttributeDefinitions(self):
        return self.attributes

    # end InstanceAttributes

    # * This method reads all the information in a DB and load it to memory.
    # * @param fileName is the database file name.
    # * @param isTrain is a flag that indicate if the database is for a train or for a test.
    # * @throws DatasetException if there is any semantical error in the input file.
    # * @throws HeaderFormatException if there is any lexical or sintactical error in the
    # * header of the input file

    def read_set(self, fileName, isTrain, file_path):
        print("Before try in readSet of InstanceSet, file_path is :" +
              str(file_path) + ".")
        print("Opening the file in readSet of InstanceSet: " + str(fileName) +
              ".")
        try:
            # Parsing the header of the DB.
            errorLogger = FormatErrorKeeper()
            self.file_to_open = Path.cwd() / file_path / fileName

            # Declaring an instance parser
            print("In readSet,file_to_open is:" + str(self.file_to_open))
            # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str
            instance_parser = InstanceParser(self.file_to_open, isTrain)
            # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
            # print("In readSet finished read file " + str(self.file_to_open))
            self.parseHeader(instance_parser, isTrain)
            # print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes)))
            # The attributes statistics are init if we are in train mode.
            print("In readSet, isTrain is " + str(isTrain))
            if isTrain and self.attributes_insance.getOutputNumAttributes(
            ) == 1:
                print("Begin Attributes.initStatistics......")
                self.attributes_insance.initStatistics()
            # A temporal vector is used to store the instances read.

            print("Reading the data")
            tempSet = []
            print("begin instance_parser.getLines()...... ")

            new_data_lines = []
            print("*********  There are : " + str(len(self.data_lines)) +
                  "In original Data lines ********* ")

            for line in self.data_lines:
                print(" The line is :" + line)
                if ("@relation" not in line) and (
                        "@attribute"
                        not in line) and ("@inputs" not in line) and (
                            "@outputs" not in line) and ("@data" not in line):
                    new_data_lines.append(line)
            print("*********  There are : " + str(len(new_data_lines)) +
                  " In new Data lines ********* ")

            for line in new_data_lines:
                if new_data_lines is not None:
                    #print("Data line: " + str(line))
                    newInstance = Instance()
                    #print("how many data already in the instanceSet: " + str(len(tempSet)))
                    newInstance.setThreeParameters(line, isTrain, len(tempSet))
                    tempSet.append(newInstance)

                # The vector of instances is converted to an array of instances.
            sizeInstance = len(tempSet)
            print(" Number of instances read: " + str(sizeInstance))
            self.instanceSet = []

            for i in range(0, sizeInstance):
                self.instanceSet.append(tempSet[i])
            print("After converting all instances")

            if self.errorLogger.getNumErrors() > 0:
                errorNumber = len(errorLogger.getAllErrors())
                # print("There has been " + str(errorNumber) + "errors in the Dataset format.")
                for k in range(0, errorLogger.getNumErrors()):
                    errorLogger.getError(k).printErrorInfo()

            # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
            #           errorLogger.getAllErrors());
            # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes)))
            # # If being on a train dataset, the statistics are finished
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                Attributes.finishStatistics(Attributes)
            # # close the stream
            instance_parser.close()
            # print("File LOADED CORRECTLY!!")
        except Exception as e:
            print("Unexpected error in readSet of InstanceSet class :" +
                  str(e))
        # end of InstanceSet constructor.

        # * It reads the information in the header of the file.
        # * It reads relation's name, attributes' names, and inputs and outputs.
        # *
        # * @param parser is the parser of the data set
        # * @param isTrain is a boolean indicating if this is a train set (and so
        # * parameters information must be read) or a test set (parameters information
        # * has not to be read).

        # read set from data row array for granularity
    def read_set_from_data_row_array(self, data_raw_array, isTrain):
        # print("Before try in read_set_from_data_row_array of InstanceSet")
        try:
            # Parsing the header of the DB.
            errorLogger = FormatErrorKeeper()
            # Declaring an instance parser

            # to do The exception in init InstanceParserof InstanceParse is: can only concatenate str (not "WindowsPath") to str
            instance_parser = InstanceParser.init_for_granularity_parser(
                data_raw_array, isTrain)
            # Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
            # print("data_raw_array size" + str(len(data_raw_array)))
            self.parse_header_from_data_row_array(instance_parser, isTrain)
            # print(" The number of output attributes is: " + str(Attributes.getOutputNumAttributes(Attributes)))
            # The attributes statistics are init if we are in train mode.
            # print("In readSet, isTrain is " + str(isTrain))
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                # print("Begin Attributes.initStatistics......")
                Attributes.initStatistics(Attributes)
            # A temporal vector is used to store the instances read.

            # print("Reading the data in read_set_from_data_row_array")
            tempSet = []
            # print("begin instance_parser.getLines()...... ")
            data_raw_array = self.data_rows
            new_data_rows = []
            number_of_rows = len(data_raw_array)
            # print("*********  There are : " + str(number_of_rows) + "In original Data rows ********* ")

            # print("*********  There are : " + str(len(new_data_rows)) + " In new Data rows ********* ")
            for i in range(0, number_of_rows):
                if len(new_data_rows) != 0:
                    # print("Data row: " + str(data_raw_array[i]))
                    newInstance = Instance()
                    # print("how many data already in the instanceSet: " + str(len(tempSet)))
                    newInstance.set_three_parameters_for_granularity_rules(
                        data_raw_array[i], isTrain, len(tempSet))
                    tempSet.append(newInstance)

                # The vector of instances is converted to an array of instances.
            sizeInstance = len(tempSet)
            # print(" Number of instances read: " + str(sizeInstance))
            self.instanceSet = []

            for i in range(0, sizeInstance):
                self.instanceSet.append(tempSet[i])
            # print("After converting all instances")
            # System.out.println("The error logger has any error: "+errorLogger.getNumErrors());
            if self.errorLogger.getNumErrors() > 0:
                errorNumber = len(errorLogger.getAllErrors())
                # print("There has been " + str(errorNumber) + "errors in the Dataset format.")
                for k in range(0, errorLogger.getNumErrors()):
                    errorLogger.getError(k).printErrorInfo()

            # print("There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
            #           errorLogger.getAllErrors());
            # print("Finishing the statistics: (isTrain)" + str(isTrain) + ", (# out attributes)" + str(Attributes.getOutputNumAttributes(Attributes)))
            # # If being on a train dataset, the statistics are finished
            if isTrain and Attributes.getOutputNumAttributes(Attributes) == 1:
                Attributes.finishStatistics(Attributes)
            # # close the stream
            instance_parser.close()
            # print("File LOADED CORRECTLY!!")
        except Exception as e:
            print("Unexpected error in readSet of InstanceSet class :" +
                  str(e))
        # end of InstanceSet constructor.

    def parseHeader(self, parser, isTrain):
        # 1. Declaration of variables
        inputAttrNames = []
        outputAttrNames = []
        inputsDef = False
        outputsDef = False
        self.header = ""
        attCount = 0
        lineCount = 0
        self.attHeader = None

        print(
            "Begin to call the InstanceParser.getLines(),parser.getLines(), in InstanceSet."
        )
        lines = parser.getLines()
        self.data_lines = lines

        print(
            "************************Before for line in lines *************************"
        )
        for line in lines:
            line = str(line).strip()
            print("In parseHeader method of InstanceSet, the line is:" + line)
            if line == "@data".lower():

                break
            else:
                # print("  Line read: " + line + ".")
                lineCount = lineCount + 1
                if "@relation" in line:

                    if isTrain:
                        relationName = str(line.replace("@relation",
                                                        "")).strip()
                        # print("set Relation name :" + str(relationName))
                        self.attributes_insance.setRelationName(relationName)
                elif "@attribute" in line:

                    if isTrain:
                        # print("Begin insertAttribute ......")
                        self.insertAttribute(line)
                        attCount = attCount + 1

                elif "@inputs" in line:

                    # print("@inputs in " + str(line))
                    self.attHeader = self.header
                    inputsDef = True

                    aux = line[8:]

                    if isTrain:
                        # print("Has @inputs, aux is :" + aux)
                        self.insertInputOutput(aux, lineCount, inputAttrNames,
                                               "inputs", isTrain)
                elif "@outputs" in line:

                    if self.attHeader is None:
                        self.attHeader = self.header
                    outputsDef = True
                    # print("Defining the output in line :" + line)
                    sub_line = line.split()  # To get the output attribute name
                    aux = sub_line[1]
                    if isTrain:
                        # print("Has @outputs, aux is :" + aux)
                        self.insertInputOutput(aux, lineCount, outputAttrNames,
                                               "outputs", isTrain)

                        # print("Size of the output is: " + str(len(outputAttrNames)))

                self.header += line + "\n"
        print(
            "************************After for line in lines.************************"
        )
        if self.attHeader is None:
            self.attHeader = self.header
        self.processInputsAndOutputs(isTrain, inputsDef, outputsDef,
                                     outputAttrNames, inputAttrNames)

    # end headerParse

    # added by rui for granularity rules
    def parse_header_from_data_row_array(self, parser, isTrain):
        # 1. Declaration of variables
        inputAttrNames = []
        outputAttrNames = []
        inputsDef = False
        outputsDef = False
        self.header = ""
        attCount = 0
        lineCount = 0
        self.attHeader = None

        # print("Begin to call the InstanceParser.getLines(),parser.getLines(), in InstanceSet.")
        self.data_rows = parser.get_rows()

    # end parse_header_from_data_row_array

    def insertAttribute(self, line):
        # print("Insert attribute begin :")
        indexL = 0
        indexR = 0
        type = ""

        # Treating string and declaring a string tokenizer
        if "{" in line:
            token_str = "{"

        elif "[" in line:
            token_str = "["

        token_withT = "\t" + token_str

        line = line.replace(token_str, token_withT)
        # print("token_double is:" + token_withT + ", line is :" + line)
        # System.out.println ("  > Processing line: "+  line );
        # st = line.split(" [{\t");

        st = line.split(
            "\t"
        )  # first we need to split the attribute line into two part , attribute name and attribute values

        # Disregarding the first token. It is @attribute
        st[0] = st[0].replace("@attribute", "").strip()  # delete @attribute
        # print("st[0] is:" + st[0])

        first_part = st[0].split()

        at = Attribute()

        # print("Get type once get instance object, at.getType() = " + str(type_string))
        at.setName(first_part[0])
        print("att set name as first_part[0] is:" + first_part[0])
        # # print( "Attribute name: "+ at.getName() )

        # to get the class name values we need to split the second part of the attribute line, to get values of attribute

        # Next action depends on the type of attribute: continuous or nominal
        if len(
                st
        ) == 1:  # Parsing a nominal attribute with no definition of values
            # print("Parsing nominal attribute without values: setType=0")
            # print("Get type =" + at.getType())
            at.setType(Attribute.NOMINAL)

        elif "{" in line:  # this because  it is the class values line
            # print("Parsing nominal attribute with values: " + line)
            # print("Get type =" + at.getType())
            # print("Before setType = 0")
            at.setType(Attribute.NOMINAL)
            # print("after setType= 0")
            at.setFixedBounds(True)

            indexL = line.index("{") + 1
            # print("indexL: " + indexL )
            indexR = line.index("}")
            # print("indexR: " + str(indexR))
            print("indexL : " + str(indexL) + "indexR : " + str(indexR))
            # print( "The Nominal values are: " + line[indexL: indexR]);
            lineSub = line[indexL:indexR]
            # print("The lineSub : " + lineSub)
            st2 = lineSub.split(",")

            for nominalStr in st2:
                at.addNominalValue(nominalStr.strip())

        else:  # Parsing an integer or real

            attType = first_part[1].lower()
            # print("attribute Name : " + str(first_part[0]) + ", attribute type = " + str(attType))

            # System.out.println ("    > Parsing "+ type + " attributes");

            if attType == "integer":
                at.setType(Attribute.INTEGER)
                # print("set integer type")
            if attType == "real":
                at.setType(Attribute.REAL)
                # print("set real type")
            indexL = line.index("[")
            indexR = line.index("]")

            # print("indexL is: " + str(indexL) + " indexR: " + str(indexR))

            if indexL != -1 and indexR != -1:
                # System.out.println ( "      > The real values are: " + line.substring( indexL+1, indexR) );
                lineSub = line[indexL + 1:indexR]
                # print("lineSub: " + lineSub)
                st2 = lineSub.split(",")

                # print("st2[0].strip() :" + st2[0])
                # print("st2[1].strip() :" + st2[1])
                minBound = float(st2[0].strip())
                maxBound = float(st2[1].strip())
                # print("Before at.setBounds(minBound, maxBound): ( " + str(minBound) + " , " + str(maxBound) + " )")
                at.setBounds(minBound, maxBound)

        # print("Before add attribute :::: ")
        self.attributes_insance.addAttribute(at)
        # print("insertAttribute is finished :::: ")

    # end insertAttribute

    def insertInputOutput(self, line, lineCount, collection, type, isTrain):

        # print(" processing insertInputOutput: " + line)

        # Declaring StringTokenizer
        st = line.split(",")

        for attName in st:
            attName = str(attName.strip())
            # print("attrName: " + attName)
            attrItem = self.attributes_insance.getAttributeByName(attName)
            attributes = self.attributes_insance.getAttributes()
            # for att in attributes:
            # print("att name is :" + str(att.getName()))
            # print("numbers of items that attributes:"+str(len(attributes)))
            if attrItem is None:
                # print("Attributes.getAttribute == None")
                # If this attribute has not been declared, generate error
                er = ErrorInfo(
                    ErrorInfo.InputTestAttributeNotDefined, 0, lineCount, 0, 0,
                    isTrain,
                    ("The attribute " + attName + " defined in @" + type +
                     " in test, it has not been defined in @inputs in its train dataset. It will be ignored"
                     ))
                InstanceSet.errorLogger.setError(er)

            else:
                # for itemCollection in collection:
                # print("Item in collection is " + itemCollection)
                # print("Attributes.getAttribute != None")
                # print("   > " + str(type) + " attribute considered: " + attName)
                if attName not in collection:
                    # print("attName:" + attName + " is not in collection")
                    collection.append(attName)

    # end insertInputOutput

    def processInputsAndOutputs(self, isTrain, inputsDef, outputsDef,
                                outputAttrNames, inputAttrNames):
        # After parsing the header, the inputs and the outputs are prepared.
        print("Processing inputs and outputs")
        self.outputInfered = False  # set default value
        if isTrain:
            print("isTrain == True")
            if not inputsDef and not outputsDef:
                # print("is neither inputAtt no outputAtt")
                posHere = self.attributes_insance.getNumAttributes() - 1

                outputAttrNames.append(
                    self.attributes_insance.getAttributeByPos(
                        posHere).getName())
                inputAttrNames = self.attributes_insance.getAttributesExcept(
                    outputAttrNames)
                self.outputInfered = True
            elif not inputsDef and outputsDef:
                # print("inputsDef == False and outputsDef == True")
                inputAttrNames = self.attributes_insance.getAttributesExcept(
                    outputAttrNames)
            elif inputsDef and not outputsDef:
                # print("inputsDef == True and outputsDef == False")
                outputAttrNames = self.attributes_insance.getAttributesExcept(
                    inputAttrNames)
                self.outputInfered = True
            print("setOutputInputAttributes begin: ")
            self.attributes_insance.setOutputInputAttributes(
                inputAttrNames, outputAttrNames)

    # end of processInputsAndOutputs

    # '''
    #  * Test if the output attribute has been infered.
    #  * @return True if the output attribute has been infered. False if not.
    #  '''

    def isOutputInfered(self):
        return self.outputInfered

    # '''
    #  * It returns the number of instances.
    #  * @return an int with the number of instances.
    # '''

    def getNumInstances(self):
        if self.instanceSet is not None:
            instanceNumber = len(self.instanceSet)
            print("instanceSet is not None, instanceNumber = " +
                  str(instanceNumber))
            return instanceNumber
        else:
            print("instanceSet is  None !!!")
            return 0
        # end numInstances

    # '''
    #  * Gets the instance located at the cursor position.
    #  * @return the instance located at the cursor position.
    # '''

    def getInstance(self, whichInstance):
        if whichInstance < 0 or whichInstance >= len(self.instanceSet):
            return None
        return self.instanceSet[whichInstance]

    # end getInstance

    #  * It returns all the instances of the class.
    #  * @return Instance[] with all the instances of the class.

    def getInstances(self):
        return self.instanceSet

    # //end getInstances
    # '''
    # '''
    #  * Returns the value of an integer or a real input attribute of an instance
    #  * in the instanceSet.
    #  * @param whichInst is the position of the instance.
    #  * @param whichAttr is the position of the input attribute.
    #  * @return a String with the numeric value.
    #  * @throws ArrayIndexOutOfBoundsException If the index is out of the instance
    #  * set size.
    # '''

    def getInputNumericValue(self, whichInst, whichAttr):
        # print("InstanceSet, getInputNumericValue begin...")
        instance_number = len(self.instanceSet)
        # print("whichInst = " + str(whichInst) + ", whichAttr =" + str(whichAttr))
        # print("len(self.instanceSet) = " + str(instance_number))

        if whichInst < 0 or whichInst >= instance_number:
            raise IndexError("You are trying to access to " + whichInst +
                             " instance and there are only " +
                             str(instance_number) + ".")
        instanceHere = self.instanceSet[whichInst]
        #print("instanceHere = " + str(instanceHere))
        numericValue = 0.0
        try:
            numericValue = instanceHere.getInputRealValues(whichAttr)
        except Exception as error:
            print("getInputRealValues has exception!! : " + str(error))

        return numericValue

    # end getInputNumericValue

    # '''
    #  * Returns the value of an integer or a real output attribute of an instance
    #  * in the instanceSet.
    #  * @param whichInst is the position of the instance.
    #  * @param whichAttr is the position of the output attribute.
    #  * @return a String with the numeric value.
    #  * @throws ArrayIndexOutOfBoundsException If the index is out of the instance
    #  * set size.
    # '''

    def getOutputNumericValue(self, whichInst, whichAttr):
        if whichInst < 0 or whichInst >= len(self.instanceSet):
            print(
                self.ArrayIndexOutOfBoundsException(
                    "You are trying to access to " + whichInst +
                    "instance and there are only" + self.instanceSet.length +
                    "."))
        return self.instanceSet[whichInst].getOutputRealValues(whichAttr)
        # end getOutputNumericValue

    #
    # '''
    #  * Returns the value of a nominal input attribute of an instance in the
    #  * instanceSet.
    #  * @param whichInst is the position of the instance.
    #  * @param whichAttr is the position of the input attribute.
    #  * @return a String with the nominal value.
    #  * @throws ArrayIndexOutOfBoundsException If the index is out of the instance
    #  * set size.
    # '''

    def getInputNominalValue(self, whichInst, whichAttr):
        if whichInst < 0 or whichInst >= len(self.instanceSet):
            print(
                self.ArrayIndexOutOfBoundsException(
                    "You are trying to access to " + whichInst +
                    " instance and there are only " +
                    str(len(self.instanceSet)) + "."))
        return self.instanceSet[whichInst].getOutputNominalValues(whichAttr)
        # end getInputNominalValue

    #
    # '''
    #  * Returns the value of a nominal output attribute of an instance in the
    #  * instanceSet.
    #  * @param whichInst is the position of the instance.
    #  * @param whichAttr is the position of the output attribute.
    #  * @return a String with the nominal value.
    #  * @throws ArrayIndexOutOfBoundsException If the index is out of the instance
    #  * set size.
    # '''

    def getOutputNominalValue(self, whichInst, whichAttr):
        if whichInst < 0 or whichInst >= len(self.instanceSet):
            print("You are trying to access to " + whichInst +
                  " instance and there are only " +
                  str(len(self.instanceSet)) + ".")
        return self.instanceSet[whichInst].getOutputNominalValues(whichAttr)
        # end getOutputNumericValue

    # '''
    #  * It does remove the instance i from the instanceSet.
    #  * @param instNum is the instance removed from the instanceSet.
    # '''

    def removeInstance(self, instNum):
        if instNum < 0 or instNum >= len(self.instanceSet):
            return
        aux = [Instance() for x in range(len(self.instanceSet) - 1)]
        add = 0
        for i in range(0, len(self.instanceSet)):
            if instNum == i:
                add = 1
            else:
                aux[i - add] = self.instanceSet[i]

        # Copying the auxiliar to the instanceSet variable
        self.instanceSet = aux
        aux = None  # avoiding memory leaks (not necessary in this case)

    # end removeInstance

    # '''
    #  * It does remove an attribute. To remove an attribute, the train and the
    #  * test sets have to be passed to mantain the coherence of the system.
    #  * Otherwise, only the attribute of the train set would be removed, leaving
    #  * inconsistent the instances of the test set, because of having one extra
    #  * attribute inexistent anymore.
    #  *
    #  * @param tSet is the test set.
    #  * @param inputAtt is a boolean that is true when the attribute that is
    #  * wanted to be removed is an input attribute.
    #  * @param whichAtt is a integer that indicate the position of the attriubte
    #  * to be deleted.
    #  * @return a boolean indicating if the attribute has been deleted
    # '''

    def removeAttribute(self, tSet, inputAtt, whichAtt):
        attToDel = None
        # Getting a reference to the attribute to del
        if inputAtt:
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                attToDel = self.attributes.getInputAttribute(whichAtt)
            else:
                attToDel = Attributes.getInputAttribute(whichAtt)

        else:
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                attToDel = self.attributes.getOutputAttribute(whichAtt)
            else:
                attToDel = Attributes.getOutputAttribute(whichAtt)

        if self.storeAttributesAsNonStatic and self.attributes is not None:
            print("Removing the attribute")
        if (not self.attributes.removeAttribute(inputAtt, whichAtt) or
            (tSet is not None
             and not tSet.attributes.removeAttribute(inputAtt, whichAtt))):
            return False
        else:
            if not Attributes.removeAttribute(inputAtt, whichAtt):
                return False
        for i in range(0, len(self.instanceSet)):
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].removeAttribute(self.attributes, attToDel,
                                                    inputAtt, whichAtt)
            else:
                self.instanceSet[i].removeAttribute(attToDel, inputAtt,
                                                    whichAtt)

        if tSet is not None:
            for i in range(0, tSet.instanceSet.length):

                if self.storeAttributesAsNonStatic and self.attributes is not None:
                    tSet.instanceSet[i].removeAttribute(
                        self.attributes, attToDel, inputAtt, whichAtt)
            else:
                tSet.instanceSet[i].removeAttribute(attToDel, inputAtt,
                                                    whichAtt)
            return True

    # end removeAttribute

    # '''
    #  * It returns the header.
    #  * @return a String with the header of the file.
    # '''

    def getHeader(self):
        return self.header

    # end getHeader

    def setHeader(self, copia):
        self.header = str(copia)

    # end getHeader

    def getAttHeader(self):
        return self.attHeader

    # end getHeader

    def setAttHeader(self, copia):
        self.attHeader = str(copia)

    # end getHeader

    # '''
    #  * It does return a new header (not necessary the same header as the
    #  * input file one). It only includes the valid attributes, those ones
    #  * defined in @inputs and @outputs (or taken as that role following the
    #  * keel format specification).
    #  * @return a String with the new header
    # '''

    def getNewHeader(self):
        line = ""
        attrs = []

        # Getting the relation name and the attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            line = "@relation " + self.attributes.getRelationName() + "\n"
            attrs = self.attributes.getInputAttributes(Attributes)
        else:
            line = "@relation " + Attributes.getRelationName() + "\n"
            attrs = Attributes.getInputAttributes(Attributes)

        for i in range(0, attrs.length):
            line += attrs[i].toString() + "\n"
            # Gettin all the outputs attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            attrs = self.attributes.getOutputAttributes()
            line += attrs[0].toString() + "\n"
            # Getting @inputs and @outputs
            line += self.attributes.getInputHeader() + "\n"
            line += self.attributes.getOutputHeader() + "\n"

        else:
            attrs = Attributes.getOutputAttributes()
            line += str(attrs[0]) + "\n"

        # Getting @inputs and @outputs
        line += Attributes.getInputHeader() + "\n"
        line += Attributes.getOutputHeader() + "\n"

        return line

    # end getNewHeader

    # '''
    #  * It does return the original header definiton but
    #  * without @input and @output in there
    # '''

    def getOriginalHeaderWithoutInOut(self):

        line = ""
        attrs = []

        # Getting the relation name and the attributes
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            line = "@relation " + self.attributes.getRelationName() + "\n"
            attrs = self.attributes.getAttributes()

        else:
            line = "@relation " + Attributes.getRelationName() + "\n"
            attrs = Attributes.getAttributes()

        for i in range(0, len(attrs)):
            line = line + str(attrs[i]) + "\n"
        return line
        # end getOriginalHeaderWithoutInOut;

    # '''
    #  * It prints the dataset to the specified PrintWriter
    #  * @param out is the PrintWriter where to print
    # '''

    def printOut(self, out):
        for i in range(0, len(self.instanceSet)):
            print("> Instance " + i + ":")
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            self.instanceSet[i].printOut(self.attributes, out)
        else:
            self.instanceSet[i].printOut(out)

    # end print
    #
    # '''
    #  * It prints the dataset to the specified PrintWriter.
    #  * The order of the attributes is the same as in the
    #  * original file
    #  * @param out is the PrintWriter where to print
    #  * @param printInOut indicates if the @inputs (1), @outputs(2),
    #  * both of them (3) or any (0) has to be printed
    # '''

    def printAsOriginal(self, out, int):
        # Printing the header as the original one
        # print(self.header)

        if self.storeAttributesAsNonStatic and self.attributes is not None:
            if self.printInOut == 1 or self.printInOut == 3:
                print(self.attributes.getInputHeader())

        if self.printInOut == 2 or self.printInOut == 3:

            print(self.attributes.getOutputHeader())

        else:
            if self.printInOut == 1 or self.printInOut == 3:
                out.println(Attributes.getInputHeader())
            if self.printInOut == 2 or self.printInOut == 3:
                out.println(Attributes.getOutputHeader())

        print("@data")
        for i in range(0, len(self.instanceSet)):
            print()
            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].printAsOriginal(self.attributes, out)
        else:
            self.instanceSet[i].printAsOriginal(self.attributes, out)

    # end printAsOriginal

    def printInsSet(self):
        print("------------- ATTRIBUTES --------------")
        if self.storeAttributesAsNonStatic and self.attributes is not None:
            self.attributes.printAttributes()

        else:
            Attributes.printAttributes()

        print("-------------- INSTANCES --------------")
        for i in range(0, self.instanceSet.length):
            print("\n> Instance " + str(i) + ":")

            if self.storeAttributesAsNonStatic and self.attributes is not None:
                self.instanceSet[i].printInsSet(self.attributes)
        else:
            self.instanceSet[i].printInsSet()

    # end print

    # Remove all instances from this InstanceSet

    def clearInstances(self):
        self.instanceSet = None

    # '''
    #    * It adds the passed instance at the end of the present InstanceSet
    #    * @param inst the instance to be added
    # '''

    def addInstance(self, inst):
        i = 0
        nVector = []
        if self.instanceSet is not None:
            nVector = [Instance() for x in range(len(self.instanceSet) + 1)]
            for i in range(0, len(self.instanceSet)):
                nVector[i] = self.instanceSet[i]

        else:
            nVector = Instance[1]

        nVector[i] = inst
        self.instanceSet = nVector

    # '''
    #    * Clear the non-Static attributes. The static class Attributes is not modified.
    # '''

    def clearNonStaticAttributes(self):
        self.attributes = None

    # '''
    #    * Appends the given attribute to the non-static list of the current InstanceSet
    #    * @param at The Attribute to be Appended
    # '''

    def addAttribute(self, att):
        if self.attributes is None:
            self.attributes = InstanceAttributes()
        self.attributes.addAttribute(att)