def __logLikelihoodContinuous(self, classLabel: str,
                                  instance: Instance) -> float:
        """
        The logLikelihoodContinuous method takes an Instance and a class label as inputs. First it gets the logarithm
        of given class label's probability via prior distribution as logLikelihood. Then it loops times of given
        instance attribute size, and accumulates the logLikelihood by calculating -0.5 * ((xi - mi) / si )** 2).

        PARAMETERS
        ----------
        classLabel : str
            String input class label.
        instance : Instance
            Instance input.

        RETURNS
        -------
        float
            The log likelihood of given class label and Instance.
        """
        loglikelihood = math.log(
            self.priorDistribution.getProbability(classLabel))
        for i in range(instance.attributeSize()):
            xi = instance.getAttribute(i).getValue()
            mi = self.__classMeans[classLabel].getValue(i)
            si = self.__classDeviations[classLabel].getValue(i)
            if si != 0:
                loglikelihood += -0.5 * math.pow((xi - mi) / si, 2)
        return loglikelihood
Exemplo n.º 2
0
    def __checkDefinition(self, instance: Instance) -> bool:
        """
        Checks the correctness of the attribute type, for instance, if the attribute of given instance is a Binary
        attribute, and the attribute type of the corresponding item of the data definition is also a Binary attribute,
        it then returns true, and false otherwise.

        PARAMETERS
        ----------
        instance : Instance
            Instance to checks the attribute type.

        RETURNS
        -------
        bool
            true if attribute types of given Instance and data definition matches.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), BinaryAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.BINARY:
                    return False
            elif isinstance(instance.getAttribute(i),
                            DiscreteIndexedAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.DISCRETE_INDEXED:
                    return False
            elif isinstance(instance.getAttribute(i), DiscreteAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.DISCRETE:
                    return False
            elif isinstance(instance.getAttribute(i), ContinuousAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.CONTINUOUS:
                    return False
        return True
    def __logLikelihoodDiscrete(self, classLabel: str,
                                instance: Instance) -> float:
        """
        The logLikelihoodDiscrete method takes an Instance and a class label as inputs. First it gets the logarithm
        of given class label's probability via prior distribution as logLikelihood and gets the class attribute
        distribution of given class label. Then it loops times of given instance attribute size, and accumulates the
        logLikelihood by calculating the logarithm of corresponding attribute distribution's smoothed probability by
        using laplace smoothing on xi.

        PARAMETERS
        ----------
        classLabel : str
            String input class label.
        instance : Instance
            Instance input.

        RETURNS
        -------
        float
            The log likelihood of given class label and Instance.
        """
        loglikelihood = math.log(
            self.priorDistribution.getProbability(classLabel))
        attributeDistributions = self.__classAttributeDistributions.get(
            classLabel)
        for i in range(instance.attributeSize()):
            xi = instance.getAttribute(i).getValue()
            loglikelihood += math.log(
                attributeDistributions[i].getProbabilityLaplaceSmoothing(xi))
        return loglikelihood
Exemplo n.º 4
0
    def predict(self, instance: Instance) -> str:
        """
        The predict method takes an Instance as an input. First it gets the size of prior distribution and loops this
        size times. Then it gets the possible class labels and and calculates metric value. At the end, it returns the
        class which has the maximum value of metric.

        PARAMETERS
        ----------
        instance : Instance
            Instance to predict.

        RETURNS
        -------
        str
            The class which has the maximum value of metric.
        """
        maxMetric = -10000000
        if isinstance(instance, CompositeInstance):
            predicatedClass = instance.getPossibleClassLabels()[0]
            size = len(instance.getPossibleClassLabels())
        else:
            predicatedClass = self.priorDistribution.getMaxItem()
            size = len(self.priorDistribution)
        for i in range(size):
            if isinstance(instance, CompositeInstance):
                Ci = instance.getPossibleClassLabels()[i]
            else:
                Ci = self.priorDistribution.getItem(i)
            if self.priorDistribution.containsItem(Ci):
                metric = self.calculateMetric(instance, Ci)
                if metric > maxMetric:
                    maxMetric = metric
                    predicatedClass = Ci
        return predicatedClass
Exemplo n.º 5
0
 def distance(self, instance1: Instance, instance2: Instance) -> float:
     result = 0
     for i in range(instance1.attributeSize()):
         if isinstance(instance1.getAttribute(i), DiscreteAttribute) and \
                 isinstance(instance2.getAttribute(i), DiscreteAttribute):
             if instance1.getAttribute(i).getValue() is not None and \
                     instance1.getAttribute(i).getValue() != instance2.getAttribute(i).getValue():
                 result += 1
         else:
             if isinstance(instance1.getAttribute(i), ContinuousAttribute) and \
                     isinstance(instance2.getAttribute(i), ContinuousAttribute):
                 result += math.pow(
                     instance1.getAttribute(i).getValue() -
                     instance2.getAttribute(i).getValue(), 2)
     return result
Exemplo n.º 6
0
 def generateInstanceFromSentence(self, sentence: Sentence, wordIndex: int) -> Instance:
     word = sentence.getWord(wordIndex)
     if isinstance(word, AnnotatedWord):
         classLabel = NamedEntityType.getNamedEntityString(word.getNamedEntityType())
         current = Instance(classLabel)
         self.addAttributes(current, sentence, wordIndex)
         return current
Exemplo n.º 7
0
    def calculateRMinusY(self, instance: Instance, inputVector: Vector,
                         weights: Matrix) -> Vector:
        """
        The calculateRMinusY method creates a new Vector with given Instance, then it multiplies given
        input Vector with given weights Matrix. After normalizing the output, it return the difference between the newly
        created Vector and normalized output.

        PARAMETERS
        ----------
        instance : Instance
            Instance is used to get class labels.
        inputVector : Vector
            Vector to multiply weights.
        weights : Matrix
            Matrix of weights

        RETURNS
        -------
        Vector
            Difference between newly created Vector and normalized output.
        """
        r = Vector()
        r.initAllZerosExceptOne(
            self.K, self.classLabels.index(instance.getClassLabel()), 1.0)
        o = weights.multiplyWithVectorFromRight(inputVector)
        y = self.normalizeOutput(o)
        return r.difference(y)
    def nearestNeighbors(self, instance: Instance) -> InstanceList:
        """
        The nearestNeighbors method takes an Instance as an input. First it gets the possible class labels, then loops
        through the data InstanceList and creates new list of KnnInstances and adds the corresponding data with
        the distance between data and given instance. After sorting this newly created list, it loops k times and
        returns the first k instances as an InstanceList.

        PARAMETERS
        ----------
        instance : Instance
            Instance to find nearest neighbors

        RETURNS
        -------
        InstanceList
            The first k instances which are nearest to the given instance as an InstanceList.
        """
        result = InstanceList()
        instances = []
        possibleClassLabels = []
        if isinstance(instance, CompositeInstance):
            possibleClassLabels = instance.getPossibleClassLabels()
        for i in range(self.__data.size()):
            if not isinstance(instance, CompositeInstance) or self.__data.get(
                    i).getClassLabel() in possibleClassLabels:
                instances.append(
                    KnnInstance(
                        self.__data.get(i),
                        self.__distanceMetric.distance(self.__data.get(i),
                                                       instance)))
        instances.sort(key=cmp_to_key(self.makeComparator()))
        for i in range(min(self.__k, len(instances))):
            result.add(instances[i].instance)
        return result
    def generateInstanceFromSentence(self, sentence: Sentence,
                                     wordIndex: int) -> Instance:
        """
        Generates a single classification instance of the morphological disambiguation problem for the given word of the
        given sentence. If the word does not have a morphological parse, the method throws InstanceNotGenerated.

        PARAMETERS
        ----------
        sentence : Sentence
            Input sentence.
        wordIndex : int
            The index of the word in the sentence.

        RETURNS
        -------
        Instance
            Classification instance.
        """
        word = sentence.getWord(wordIndex)
        if isinstance(word, AnnotatedWord):
            current = Instance(word.getParse().getTransitionList())
            for i in range(self.windowSize):
                if wordIndex - self.windowSize + i >= 0:
                    self.addAttributesForPreviousWords(
                        current, sentence, wordIndex - self.windowSize + i)
                else:
                    self.addAttributesForEmptyWords(current, "<s>")
            self.addAttributesForPreviousWords(current, sentence, wordIndex)
            return current
Exemplo n.º 10
0
    def convertInstance(self, instance: Instance):
        """
        The convertInstance method takes an Instance as an input and creates a Vector attributes from continuous
        Attributes. After removing all attributes of given instance, it then adds new ContinuousAttribute by using the
        dot product of attributes Vector and the eigenvectors.

        PARAMETERS
        ----------
        instance : Instance
            Instance that will be converted to ContinuousAttribute by using eigenvectors.
        """
        attributes = Vector(instance.continuousAttributes())
        instance.removeAllAttributes()
        for eigenvector in self.__eigenvectors:
            instance.addAttribute(
                ContinuousAttribute(attributes.dotProduct(eigenvector)))
Exemplo n.º 11
0
    def convertInstance(self, instance: Instance):
        """
        Normalizes the continuous attributes of a single instance. For all i, new x_i = (x_i - m_i) / s_i.

        PARAMETERS
        ----------
        instance : Instance
            Instance whose attributes will be normalized.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), ContinuousAttribute):
                xi = instance.getAttribute(i)
                mi = self.__averageInstance.getAttribute(i)
                si = self.__standardDeviationInstance.getAttribute(i)
                if isinstance(xi, ContinuousAttribute):
                    xi.setValue(
                        (xi.getValue() - mi.getValue()) / si.getValue())
Exemplo n.º 12
0
    def discreteCheck(self, instance: Instance) -> bool:
        """
        Checks given instance's attribute and returns true if it is a discrete indexed attribute, false otherwise.

        PARAMETERS
        ----------
        instance Instance to check.

        RETURNS
        -------
        bool
            True if instance is a discrete indexed attribute, false otherwise.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), DiscreteAttribute) and not isinstance(instance.getAttribute(i),
                                                                                          DiscreteIndexedAttribute):
                return False
        return True
    def removeDiscreteAttributesFromInstance(self, instance: Instance,
                                             size: int):
        """
        The removeDiscreteAttributesFromInstance method takes an Instance as an input, and removes the discrete
        attributes from given instance.

        PARAMETERS
        ----------
        instance : Instance
            Instance to removes attributes from.
        size : int
            Size of the given instance.
        """
        k = 0
        for i in range(size):
            if len(self.attributeDistributions[i]) > 0:
                instance.removeAttribute(k)
            else:
                k = k + 1
    def convertInstance(self, instance: Instance):
        """
        Converts discrete attributes of a single instance to indexed version.

        PARAMETERS
        ----------
        instance : Instance
            The instance to be converted.
        """
        size = instance.attributeSize()
        for i in range(size):
            if len(self.attributeDistributions[i]) > 0:
                index = self.attributeDistributions[i].getIndex(
                    instance.getAttribute(i).__str__())
                instance.addAttribute(
                    DiscreteIndexedAttribute(
                        instance.getAttribute(i).__str__(), index,
                        len(self.attributeDistributions[i])))
        self.removeDiscreteAttributesFromInstance(instance, size)
Exemplo n.º 15
0
    def createInputVector(self, instance: Instance):
        """
        The createInputVector method takes an Instance as an input. It converts given Instance to the Vector
        and insert 1.0 to the first element.

        PARAMETERS
        ----------
        instance : Instance
            Instance to insert 1.0.
        """
        self.x = instance.toVector()
        self.x.insert(0, 1.0)
Exemplo n.º 16
0
    def distance(self, instance1: Instance, instance2: Instance) -> float:
        """
        Calculates Mahalanobis distance between two instances. (x^(1) - x^(2)) S (x^(1) - x^(2))^T

        PARAMETERS
        ----------
        instance1 : Instance
            First instance.
        instance2 : Instance
            Second instance.

        RETURNS
        -------
        float
            Mahalanobis distance between two instances.
        """
        v1 = instance1.toVector()
        v2 = instance2.toVector()
        v1.subtract(v2)
        v3 = self.__covarianceInverse.multiplyWithVectorFromLeft(v1)
        return v3.dotProduct(v1)
Exemplo n.º 17
0
    def __setDefinition(self, instance: Instance):
        """
        Adds the attribute types according to given Instance. For instance, if the attribute type of given Instance
        is a Discrete type, it than adds a discrete attribute type to the list of attribute types.

        PARAMETERS
        ----------
        instance : Instance
            Instance input.
        """
        attributeTypes = []
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), BinaryAttribute):
                attributeTypes.append(AttributeType.BINARY)
            elif isinstance(instance.getAttribute(i),
                            DiscreteIndexedAttribute):
                attributeTypes.append(AttributeType.DISCRETE_INDEXED)
            elif isinstance(instance.getAttribute(i), DiscreteAttribute):
                attributeTypes.append(AttributeType.DISCRETE)
            elif isinstance(instance.getAttribute(i), ContinuousAttribute):
                attributeTypes.append(AttributeType.CONTINUOUS)
        self.__definition = DataDefinition(attributeTypes)
    def satisfy(self, instance: Instance):
        """
        The satisfy method takes an Instance as an input.

        If defined Attribute value is a DiscreteIndexedAttribute it compares the index of Attribute of instance at the
        attributeIndex and the index of Attribute value and returns the result.

        If defined Attribute value is a DiscreteAttribute it compares the value of Attribute of instance at the
        attributeIndex and the value of Attribute value and returns the result.

        If defined Attribute value is a ContinuousAttribute it compares the value of Attribute of instance at the
        attributeIndex and the value of Attribute value and returns the result according to the comparison character
        whether it is less than or greater than signs.

        PARAMETERS
        ----------
        instance : Instance
            Instance to compare.

        RETURNS
        -------
        bool
            True if gicen instance satisfies the conditions.
        """
        if isinstance(self.__value, DiscreteIndexedAttribute):
            if self.__value.getIndex() != -1:
                return instance.getAttribute(self.__attributeIndex).getIndex() == self.__value.getIndex()
            else:
                return True
        elif isinstance(self.__value, DiscreteAttribute):
            return instance.getAttribute(self.__attributeIndex).getValue() == self.__value.getValue()
        elif isinstance(self.__value, ContinuousAttribute):
            if self.__comparison == "<":
                return instance.getAttribute(self.__attributeIndex).getValue() <= self.__value.getValue()
            else:
                return instance.getAttribute(self.__attributeIndex).getValue() > self.__value.getValue()
        return False
Exemplo n.º 19
0
    def initWithFile(self, fileName: str):
        """
        Constructor for generating a new DataSet from given File.

        PARAMETERS
        ----------
        fileName : str
            File to generate DataSet from.
        """
        self.__instances = InstanceList()
        self.__definition = DataDefinition()
        inputFile = open(fileName, 'r', encoding='utf8')
        lines = inputFile.readlines()
        i = 0
        for line in lines:
            attributes = line.split(",")
            if i == 0:
                for j in range(len(attributes) - 1):
                    try:
                        float(attributes[j])
                        self.__definition.addAttribute(
                            AttributeType.CONTINUOUS)
                    except:
                        self.__definition.addAttribute(AttributeType.DISCRETE)
            else:
                if len(attributes) != self.__definition.attributeCount() + 1:
                    continue
            if ";" not in attributes[len(attributes) - 1]:
                instance = Instance(attributes[len(attributes) - 1])
            else:
                labels = attributes[len(attributes) - 1].split(";")
                instance = CompositeInstance(labels[0], None, labels)
            for j in range(len(attributes) - 1):
                if self.__definition.getAttributeType(
                        j) is AttributeType.CONTINUOUS:
                    instance.addAttribute(
                        ContinuousAttribute(float(attributes[j])))
                elif self.__definition.getAttributeType(
                        j) is AttributeType.DISCRETE:
                    instance.addAttribute(DiscreteAttribute(attributes[j]))
            if instance.attributeSize() == self.__definition.attributeCount():
                self.__instances.add(instance)
            i = i + 1
    def predict(self, instance: Instance) -> str:
        """
        The predict method  performs prediction on the root node of given instance, and if it is null, it returns the
        possible class labels. Otherwise it returns the returned class labels.

        PARAMETERS
        ----------
        instance : Instance
            Instance make prediction.

        RETURNS
        -------
        str
            Possible class labels.
        """
        predictedClass = self.__root.predict(instance)
        if predictedClass is None and isinstance(instance, CompositeInstance):
            predictedClass = instance.getPossibleClassLabels()
        return predictedClass
Exemplo n.º 21
0
    def predict(self, instance: Instance) -> str:
        """
        The predict method takes an Instance as an input and returns the entry of distribution which has the maximum
        value.

        PARAMETERS
        ----------
        instance : Instance
            Instance to make prediction.

        RETURNS
        -------
        str
            The entry of distribution which has the maximum value.
        """
        if isinstance(instance, CompositeInstance):
            possibleClassLabels = instance.getPossibleClassLabels()
            return self.distribution.getMaxItemIncludeTheseOnly(
                possibleClassLabels)
        else:
            return self.distribution.getMaxItem()
Exemplo n.º 22
0
    def calculateMetric(self, instance: Instance, Ci: str) -> float:
        """
        The calculateMetric method takes an Instance and a String as inputs. It returns the dot product of given
        Instance and wi plus w0i.

        PARAMETERS
        ----------
        instance : Instance
            Instance input.
        Ci : str
            String input.

        RETURNS
        -------
        float
            The dot product of given Instance and wi plus w0i.
        """
        xi = instance.toVector()
        wi = self.w[Ci]
        w0i = self.w0[Ci]
        return wi.dotProduct(xi) + w0i
Exemplo n.º 23
0
    def predict(self, instance: Instance) -> str:
        """
        The predict method takes an Instance as an input, converts it to a Vector and calculates the Matrix y by
        multiplying Matrix W with Vector x. Then it returns the class label which has the maximum y value.

        PARAMETERS
        ----------
        instance : Instance
            Instance to predict.

        RETURNS
        -------
        str
            The class label which has the maximum y.
        """
        self.createInputVector(instance)
        self.calculateOutput()
        if isinstance(instance, CompositeInstance):
            return self.predictWithCompositeInstance(
                instance.getPossibleClassLabels())
        else:
            return self.classLabels[self.y.maxIndex()]
Exemplo n.º 24
0
    def generateInstanceFromSentence(self, sentence: Sentence, wordIndex: int) -> Instance:
        """
        Generates a single classification instance of the Shallow Parse problem for the given word of the given
        sentence. If the  word has not been labeled with shallow parse tag yet, the method returns null.

        PARAMETERS
        ----------
        sentence : Sentence
            Input sentence.
        wordIndex : int
            The index of the word in the sentence.

        RETURNS
        -------
        Instance
            Classification instance.
        """
        word = sentence.getWord(wordIndex)
        if isinstance(word, AnnotatedWord):
            classLabel = word.getShallowParse()
            current = Instance(classLabel)
            self.addAttributes(current, sentence, wordIndex)
            return current
Exemplo n.º 25
0
    def predict(self, instance: Instance) -> str:
        """
        The predict method takes an Instance as an input and finds the nearest neighbors of given instance. Then
        it returns the first possible class label as the predicted class.

        PARAMETERS
        ----------
        instance : Instance
            Instance to make prediction.

        RETURNS
        -------
        str
            The first possible class label as the predicted class.
        """
        nearestNeighbors = self.nearestNeighbors(instance)
        if isinstance(instance,
                      CompositeInstance) and nearestNeighbors.size() == 0:
            predictedClass = instance.getPossibleClassLabels()[0]
        else:
            predictedClass = Model.getMaximum(
                nearestNeighbors.getClassLabels())
        return predictedClass
    def predict(self, instance: Instance) -> str:
        """
        The predict method gets an Instance as an input and retrieves the possible class labels as an ArrayList. Then
        selects a random number as an index and returns the class label at this selected index.

        PARAMETERS
        ----------
        instance : Instance
            Instance to make prediction.

        RETURNS
        -------
        str
            The class label at the randomly selected index.
        """
        if isinstance(instance, CompositeInstance):
            possibleClassLabels = instance.getPossibleClassLabels()
            size = len(possibleClassLabels)
            index = random.randint(0, size)
            return possibleClassLabels[index]
        else:
            size = len(self.__classLabels)
            index = random.randrange(size)
            return self.__classLabels[index]
Exemplo n.º 27
0
    def calculateMetric(self, instance: Instance, Ci: str) -> float:
        """
        The calculateMetric method takes an Instance and a String as inputs. It multiplies Matrix Wi with Vector xi
        then calculates the dot product of it with xi. Then, again it finds the dot product of wi and xi and returns the
        summation with w0i.

        PARAMETERS
        ----------
        instance : Instance
            Instance input.
        Ci : str
            String input.

        RETURNS
        -------
        float
            The result of Wi.multiplyWithVectorFromLeft(xi).dotProduct(xi) + wi.dotProduct(xi) + w0i.
        """
        xi = instance.toVector()
        Wi = self.__W[Ci]
        wi = self.w[Ci]
        w0i = self.w0[Ci]
        return Wi.multiplyWithVectorFromLeft(xi).dotProduct(
            xi) + wi.dotProduct(xi) + w0i
    def convertInstance(self, instance: Instance):
        """
        Converts discrete attributes of a single instance to continuous version using 1-of-L encoding. For example, if
        an attribute has values red, green, blue; this attribute will be converted to 3 continuous attributes where
        red will have the value 100, green will have the value 010, and blue will have the value 001.

        PARAMETERS
        ----------
        instance : Instance
            The instance to be converted.
        """
        size = instance.attributeSize()
        for i in range(size):
            if len(self.attributeDistributions[i]) > 0:
                index = self.attributeDistributions[i].getIndex(
                    instance.getAttribute(i).__str__())
                for j in range(len(self.attributeDistributions[i])):
                    if j != index:
                        instance.addAttribute(ContinuousAttribute(0))
                    else:
                        instance.addAttribute(ContinuousAttribute(1))
        self.removeDiscreteAttributesFromInstance(instance, size)