Пример #1
0
def target_spearman_correlations(data, vars=None, target_var=None):
    import numpy
    import statc
    
    if vars is None:
        vars = list(data.domain.variables)
    
    if target_var is None:
        if is_continuous(data.domain.class_var):
            target_var = data.domain.class_var
        else:
            raise ValueError("A data with continuous class variable expected if 'target_var' is not explicitly declared.")
    
    all_vars = list(data.domain.variables)
    indices = [all_vars.index(v) for v in vars]
    target_index = all_vars.index(target_var)
    (data,) = data.to_numpy_MA("Ac")
    
    averages = numpy.ma.average(data, axis=0)
    target_values = data[:, target_index].filled(averages[target_index])
    target_values = list(target_values)
    
    correlations = []
    for i, var_i in enumerate(indices):
        a = data[:,var_i].filled(averages[var_i])
        correlations.append(statc.spearmanr(list(a), target_values)[0])
        
    return correlations
Пример #2
0
def compute_attr_dist_matrix(data):
    import numpy, statc

    attrs = data.domain.attributes
    matrix = SymMatrix(len(attrs))

    # why not just matrix.items = attrs?
    matrix.setattr(b"items", attrs)

    m = data.toNumpyMA("A")[0]
    averages = numpy.ma.average(m, axis=0)
    filleds = [list(numpy.ma.filled(m[:, i], averages[i])) for i in range(len(attrs))]
    for a1, f1 in enumerate(filleds):
        for a2 in range(a1):
            matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
    return matrix
Пример #3
0
def pairwise_spearman_correlations(data, vars=None):
    import numpy
    import statc
    
    if vars is None:
        vars = list(data.domain.variables)
    
    matrix = Orange.core.SymMatrix(len(vars))
    
    all_vars = list(data.domain.variables)
    indices = [all_vars.index(v) for v in vars]
    (data,) = data.to_numpy_MA("Ac")
    
    averages = numpy.ma.average(data, axis=0)
    
    for i, var_i in enumerate(indices):
        for j, var_j in enumerate(indices[i + 1:], i + 1):
            a = data[:, var_i].filled(averages[var_i])
            b = data[:, var_j].filled(averages[var_j])
            matrix[i, j] = statc.spearmanr(list(a), list(b))[0]
            
    return matrix
Пример #4
0
    else:
        if classInteractions == 3:
            for a1 in range(len(atts)):
                for a2 in range(a1):
                    matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, inputdata, 0).r) / 2.0
        else:
            if len(inputdata) < 3:
                return None
            import numpy, statc
            m = inputdata.toNumpyMA("A")[0]
            averages = numpy.ma.average(m, axis=0)
            filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))]
            for a1, f1 in enumerate(filleds):
                for a2 in range(a1):
                    matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
    output_dict = {}
    output_dict['dm']=matrix        
    return output_dict

def cforange_hierarchical_clustering(input_dict):
    return {'centroids' : None, 'selected_examples' : None, 'unselected_examples' : None}

class Clustering:
    @staticmethod
    def hierarchical_clustering(linkage, distance_matrix):
        import Orange, orange, sys
        linkages = [("Single linkage", orange.HierarchicalClustering.Single),
                    ("Average linkage", orange.HierarchicalClustering.Average),
                    ("Ward's linkage", orange.HierarchicalClustering.Ward),
                    ("Complete linkage", orange.HierarchicalClustering.Complete)]
Пример #5
0
    def computeMatrix(self):
        self.error(0)
        if self.data:
            atts = self.data.domain.attributes
            if len(atts) < 2:
                self.error(0, "Dataset must contain at least two attributes")
                return None
            matrix = orange.SymMatrix(len(atts))
            matrix.setattr('items', atts)
            if self.classInteractions < 3:
                if self.data.domain.hasContinuousAttributes():
                    if self.discretizedData is None:
                        try:
                            self.discretizedData = orange.Preprocessor_discretize(self.data, method=orange.EquiNDiscretization(numberOfIntervals=4))
                        except orange.KernelException, ex:
                            self.error(0, "An error ocured during data discretization: %s" % ex.message)
                            return None
                    data = self.discretizedData
                else:
                    data = self.data

                # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares
                # some common stuff for all measures. If we want to use his code, we need the class variable, so we
                # prepare a fake one
                if not data.domain.classVar:
                    if self.classInteractions == 0:
                        classedDomain = orange.Domain(data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"]))
                        data = orange.ExampleTable(classedDomain, data)
                    else:
                        self.error(0, "The selected distance measure requires a data set with a class attribute")
                        return None

                im = orngInteract.InteractionMatrix(data, dependencies_too=1)
                off = 1
                if self.classInteractions == 0:
                    diss,labels = im.exportChi2Matrix()
                    off = 0
                elif self.classInteractions == 1:
                    (diss,labels) = im.depExportDissimilarityMatrix(jaccard=1)  # 2-interactions
                else:
                    (diss,labels) = im.exportDissimilarityMatrix(jaccard=1)  # 3-interactions

                for i in range(len(atts)-off):
                    for j in range(i+1):
                        matrix[i+off, j] = diss[i][j]

            else:
                if self.classInteractions == 3:
                    for a1 in range(len(atts)):
                        for a2 in range(a1):
                            matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, self.data, 0).r) / 2.0
                else:
                    if len(self.data) < 3:
                        self.error(0, "The selected distance measure requires a data set with at least 3 instances")
                        return None
                    import numpy, statc
                    m = self.data.toNumpyMA("A")[0]
                    averages = numpy.ma.average(m, axis=0)
                    filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))]
                    for a1, f1 in enumerate(filleds):
                        for a2 in range(a1):
                            matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
                
            return matrix