def __logLikelihoodContinuous(self, classLabel: str,
                                  instance: Instance) -> float:
        """
        The logLikelihoodContinuous method takes an Instance and a class label as inputs. First it gets the logarithm
        of given class label's probability via prior distribution as logLikelihood. Then it loops times of given
        instance attribute size, and accumulates the logLikelihood by calculating -0.5 * ((xi - mi) / si )** 2).

        PARAMETERS
        ----------
        classLabel : str
            String input class label.
        instance : Instance
            Instance input.

        RETURNS
        -------
        float
            The log likelihood of given class label and Instance.
        """
        loglikelihood = math.log(
            self.priorDistribution.getProbability(classLabel))
        for i in range(instance.attributeSize()):
            xi = instance.getAttribute(i).getValue()
            mi = self.__classMeans[classLabel].getValue(i)
            si = self.__classDeviations[classLabel].getValue(i)
            if si != 0:
                loglikelihood += -0.5 * math.pow((xi - mi) / si, 2)
        return loglikelihood
    def __checkDefinition(self, instance: Instance) -> bool:
        """
        Checks the correctness of the attribute type, for instance, if the attribute of given instance is a Binary
        attribute, and the attribute type of the corresponding item of the data definition is also a Binary attribute,
        it then returns true, and false otherwise.

        PARAMETERS
        ----------
        instance : Instance
            Instance to checks the attribute type.

        RETURNS
        -------
        bool
            true if attribute types of given Instance and data definition matches.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), BinaryAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.BINARY:
                    return False
            elif isinstance(instance.getAttribute(i),
                            DiscreteIndexedAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.DISCRETE_INDEXED:
                    return False
            elif isinstance(instance.getAttribute(i), DiscreteAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.DISCRETE:
                    return False
            elif isinstance(instance.getAttribute(i), ContinuousAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.CONTINUOUS:
                    return False
        return True
    def __logLikelihoodDiscrete(self, classLabel: str,
                                instance: Instance) -> float:
        """
        The logLikelihoodDiscrete method takes an Instance and a class label as inputs. First it gets the logarithm
        of given class label's probability via prior distribution as logLikelihood and gets the class attribute
        distribution of given class label. Then it loops times of given instance attribute size, and accumulates the
        logLikelihood by calculating the logarithm of corresponding attribute distribution's smoothed probability by
        using laplace smoothing on xi.

        PARAMETERS
        ----------
        classLabel : str
            String input class label.
        instance : Instance
            Instance input.

        RETURNS
        -------
        float
            The log likelihood of given class label and Instance.
        """
        loglikelihood = math.log(
            self.priorDistribution.getProbability(classLabel))
        attributeDistributions = self.__classAttributeDistributions.get(
            classLabel)
        for i in range(instance.attributeSize()):
            xi = instance.getAttribute(i).getValue()
            loglikelihood += math.log(
                attributeDistributions[i].getProbabilityLaplaceSmoothing(xi))
        return loglikelihood
示例#4
0
 def distance(self, instance1: Instance, instance2: Instance) -> float:
     result = 0
     for i in range(instance1.attributeSize()):
         if isinstance(instance1.getAttribute(i), DiscreteAttribute) and \
                 isinstance(instance2.getAttribute(i), DiscreteAttribute):
             if instance1.getAttribute(i).getValue() is not None and \
                     instance1.getAttribute(i).getValue() != instance2.getAttribute(i).getValue():
                 result += 1
         else:
             if isinstance(instance1.getAttribute(i), ContinuousAttribute) and \
                     isinstance(instance2.getAttribute(i), ContinuousAttribute):
                 result += math.pow(
                     instance1.getAttribute(i).getValue() -
                     instance2.getAttribute(i).getValue(), 2)
     return result
示例#5
0
    def convertInstance(self, instance: Instance):
        """
        Normalizes the continuous attributes of a single instance. For all i, new x_i = (x_i - m_i) / s_i.

        PARAMETERS
        ----------
        instance : Instance
            Instance whose attributes will be normalized.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), ContinuousAttribute):
                xi = instance.getAttribute(i)
                mi = self.__averageInstance.getAttribute(i)
                si = self.__standardDeviationInstance.getAttribute(i)
                if isinstance(xi, ContinuousAttribute):
                    xi.setValue(
                        (xi.getValue() - mi.getValue()) / si.getValue())
示例#6
0
    def discreteCheck(self, instance: Instance) -> bool:
        """
        Checks given instance's attribute and returns true if it is a discrete indexed attribute, false otherwise.

        PARAMETERS
        ----------
        instance Instance to check.

        RETURNS
        -------
        bool
            True if instance is a discrete indexed attribute, false otherwise.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), DiscreteAttribute) and not isinstance(instance.getAttribute(i),
                                                                                          DiscreteIndexedAttribute):
                return False
        return True
    def convertInstance(self, instance: Instance):
        """
        Converts discrete attributes of a single instance to indexed version.

        PARAMETERS
        ----------
        instance : Instance
            The instance to be converted.
        """
        size = instance.attributeSize()
        for i in range(size):
            if len(self.attributeDistributions[i]) > 0:
                index = self.attributeDistributions[i].getIndex(
                    instance.getAttribute(i).__str__())
                instance.addAttribute(
                    DiscreteIndexedAttribute(
                        instance.getAttribute(i).__str__(), index,
                        len(self.attributeDistributions[i])))
        self.removeDiscreteAttributesFromInstance(instance, size)
    def initWithFile(self, fileName: str):
        """
        Constructor for generating a new DataSet from given File.

        PARAMETERS
        ----------
        fileName : str
            File to generate DataSet from.
        """
        self.__instances = InstanceList()
        self.__definition = DataDefinition()
        inputFile = open(fileName, 'r', encoding='utf8')
        lines = inputFile.readlines()
        i = 0
        for line in lines:
            attributes = line.split(",")
            if i == 0:
                for j in range(len(attributes) - 1):
                    try:
                        float(attributes[j])
                        self.__definition.addAttribute(
                            AttributeType.CONTINUOUS)
                    except:
                        self.__definition.addAttribute(AttributeType.DISCRETE)
            else:
                if len(attributes) != self.__definition.attributeCount() + 1:
                    continue
            if ";" not in attributes[len(attributes) - 1]:
                instance = Instance(attributes[len(attributes) - 1])
            else:
                labels = attributes[len(attributes) - 1].split(";")
                instance = CompositeInstance(labels[0], None, labels)
            for j in range(len(attributes) - 1):
                if self.__definition.getAttributeType(
                        j) is AttributeType.CONTINUOUS:
                    instance.addAttribute(
                        ContinuousAttribute(float(attributes[j])))
                elif self.__definition.getAttributeType(
                        j) is AttributeType.DISCRETE:
                    instance.addAttribute(DiscreteAttribute(attributes[j]))
            if instance.attributeSize() == self.__definition.attributeCount():
                self.__instances.add(instance)
            i = i + 1
    def __setDefinition(self, instance: Instance):
        """
        Adds the attribute types according to given Instance. For instance, if the attribute type of given Instance
        is a Discrete type, it than adds a discrete attribute type to the list of attribute types.

        PARAMETERS
        ----------
        instance : Instance
            Instance input.
        """
        attributeTypes = []
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), BinaryAttribute):
                attributeTypes.append(AttributeType.BINARY)
            elif isinstance(instance.getAttribute(i),
                            DiscreteIndexedAttribute):
                attributeTypes.append(AttributeType.DISCRETE_INDEXED)
            elif isinstance(instance.getAttribute(i), DiscreteAttribute):
                attributeTypes.append(AttributeType.DISCRETE)
            elif isinstance(instance.getAttribute(i), ContinuousAttribute):
                attributeTypes.append(AttributeType.CONTINUOUS)
        self.__definition = DataDefinition(attributeTypes)
    def convertInstance(self, instance: Instance):
        """
        Converts discrete attributes of a single instance to continuous version using 1-of-L encoding. For example, if
        an attribute has values red, green, blue; this attribute will be converted to 3 continuous attributes where
        red will have the value 100, green will have the value 010, and blue will have the value 001.

        PARAMETERS
        ----------
        instance : Instance
            The instance to be converted.
        """
        size = instance.attributeSize()
        for i in range(size):
            if len(self.attributeDistributions[i]) > 0:
                index = self.attributeDistributions[i].getIndex(
                    instance.getAttribute(i).__str__())
                for j in range(len(self.attributeDistributions[i])):
                    if j != index:
                        instance.addAttribute(ContinuousAttribute(0))
                    else:
                        instance.addAttribute(ContinuousAttribute(1))
        self.removeDiscreteAttributesFromInstance(instance, size)