示例#1
0
    def generateViews(self):
        T=map(list,zip(*self.D))
        if self.transformed:
            for column_id in range(self.column_num):
                f = Features(self.names[column_id],self.types[column_id],self.origins[column_id])
                #calculate min,max for numerical
                if f.type==Type.numerical:
                    if self.classify_num==1 or not self.describe2:#not categorized or categorized scatter
                        f.min,f.max=min(T[column_id]),max(T[column_id])
                        f.minmin=f.min
                        if f.min==f.max:
                            self.types[column_id]=f.type=Type.none
                            self.features.append(f)
                            continue
                    else:
                        delta=self.tuple_num/self.classify_num
                        f.min=[min(T[column_id][class_id*delta:(class_id+1)*delta]) for class_id in range(self.classify_num)]
                        f.minmin=min(f.min)
                        f.max=[max(T[column_id][class_id*delta:(class_id+1)*delta]) for class_id in range(self.classify_num)]
                        if sum([f.max[class_id]-f.min[class_id] for class_id in range(self.classify_num)])==0:
                            self.types[column_id]=f.type=Type.none
                            self.features.append(f)
                            continue
                        if min(f.min)==max(f.min) and min(f.max)==max(f.max):
                            if sum([0 if T[column_id][class_id*delta:(class_id+1)*delta]==T[column_id][(class_id+1)*delta:(class_id+2)*delta] else 1 for class_id in range(self.classify_num-1)])==0:
                                self.types[column_id]=f.type=Type.none
                                self.features.append(f)
                                continue



                #calculate distinct,ratio for categorical,temporal
                if f.type==Type.categorical or f.type==Type.temporal:
                    f.distinct=self.tuple_num
                    f.ratio=1.0

                self.features.append(f)
        else:
            for column_id in range(self.column_num):
                f = Features(self.names[column_id],self.types[column_id],self.origins[column_id])

                #calculate min,max for numerical,temporal
                if f.type==Type.numerical or f.type==Type.temporal:
                    f.min,f.max=min(T[column_id]),max(T[column_id])
                    f.minmin=f.min
                    if f.min==f.max:
                        self.types[column_id]=f.type=Type.none
                        self.features.append(f)
                        continue

                d={}
                #calculate distinct,ratio for categorical,temporal
                if f.type == Type.categorical or f.type == Type.temporal:
                    for i in range(self.tuple_num):
                        if self.D[i][column_id] in d:
                            d[self.D[i][column_id]]+=1
                        else:
                            d[self.D[i][column_id]]=1
                    f.distinct = len(d)
                    if f.distinct==1:
                        self.types[column_id]=f.type=Type.none
                        self.features.append(f)
                        continue
                    f.ratio = 1.0 * f.distinct / self.tuple_num
                    f.distinct_values=[(k,d[k]) for k in sorted(d)]
                    if f.type==Type.temporal:
                        self.getIntervalBins(f)

                self.features.append(f)


        #generate 2D views
        if self.describe2=='' and self.classify_id==-1:
            for i in range(self.column_num):
                for j in range(self.column_num):
                    if i==j:
                        continue

                    fi=self.features[i]
                    fj=self.features[j]
                    if fi.type==Type.categorical and fj.type==Type.numerical and fi.ratio==1.0:
                        charts=[]
                        if fj.minmin>0 and fi.distinct<=5 and not (len(fj.name)>=6 and fj.name[0:4]=='AVG(' and fj.name[-1]==')'):
                            charts.append(Chart.pie)
                        if fi.distinct<=20:
                            charts.append(Chart.bar)
                    elif fi.type==Type.temporal and fj.type==Type.numerical and fi.ratio==1.0:
                        charts=[]
                        if fi.distinct<7:
                            charts.append(Chart.bar)
                        else:
                            charts.append(Chart.line)
                    elif (not self.transformed) and fi.type==Type.numerical and fj.type==Type.numerical and i<j:
                        charts=[Chart.scatter]
                    else:
                        charts=[]

                    for chart in charts:
                        v=View(self,i,j,-1,1,[T[i]],[T[j]],chart)
                        self.views.append(v)
                        self.view_num+=1
        #generate 3D views
        elif self.describe2:
            for i in range(self.column_num):
                for j in range(self.column_num):
                    fi=self.features[i]
                    fj=self.features[j]
                    if fi.type==Type.categorical and fj.type==Type.numerical and fj.minmin>0:
                        charts=[Chart.bar]
                    elif fi.type==Type.temporal and fj.type==Type.numerical:
                        if self.tuple_num/self.classify_num<7:
                            charts=[Chart.bar]
                        else:
                            charts=[Chart.line]
                    else:
                        charts=[]
                    for chart in charts:
                        delta=self.tuple_num/self.classify_num
                        series_data = [T[j][series * delta:(series + 1) * delta] for series in range(self.classify_num)]
                        v = View(self, i, j, self.classify_id, self.classify_num, [T[i][0:delta]], series_data, chart)
                        self.views.append(v)
                        self.view_num += 1
        else:
            for i in range(self.column_num):
                for j in range(self.column_num):
                    if i>=j or self.types[i]!=Type.numerical or self.types[j]!=Type.numerical:
                        continue
                    X=[]
                    Y=[]
                    id=0
                    for k in range(self.classify_num):
                        x=T[i][id:id+self.classes[k][1]]
                        y=T[j][id:id+self.classes[k][1]]
                        id+=self.classes[k][1]
                        X.append(x)
                        Y.append(y)
                    v=View(self,i,j,self.classify_id,self.classify_num,X,Y,Chart.scatter)
                    self.views.append(v)
                    self.view_num+=1

        self.instance.view_num+=self.view_num
示例#2
0
    def getFeatures(self):
        self.T = map(list, zip(*self.D))
        if self.transformed == True:
            for column_id in range(self.column_num):
                f = Features(self.names[column_id], self.types[column_id],
                             self.origins[column_id])
                # calculate min,max for numerical
                if f.type == Type.numerical:
                    if self.classify_num == 1 or not self.describe2:  # not categorized or categorized scatter
                        f.min, f.max = min(self.T[column_id]), max(
                            self.T[column_id])
                        f.minmin = f.min
                        if f.min == f.max:
                            self.types[column_id] = f.type = Type.none
                            self.features.append(f)
                            continue
                    else:
                        delta = self.tuple_num / self.classify_num
                        f.min = [
                            min(self.T[column_id][class_id *
                                                  delta:(class_id + 1) *
                                                  delta])
                            for class_id in range(self.classify_num)
                        ]
                        f.minmin = min(f.min)
                        f.max = [
                            max(self.T[column_id][class_id *
                                                  delta:(class_id + 1) *
                                                  delta])
                            for class_id in range(self.classify_num)
                        ]
                        if sum([
                                f.max[class_id] - f.min[class_id]
                                for class_id in range(self.classify_num)
                        ]) == 0:
                            self.types[column_id] = f.type = Type.none
                            self.features.append(f)
                            continue
                        if min(f.min) == max(f.min) and min(f.max) == max(
                                f.max):
                            if sum([
                                    0
                                    if self.T[column_id][class_id *
                                                         delta:(class_id + 1) *
                                                         delta]
                                    == self.T[column_id][(class_id + 1) *
                                                         delta:(class_id + 2) *
                                                         delta] else 1
                                    for class_id in range(self.classify_num -
                                                          1)
                            ]) == 0:
                                self.types[column_id] = f.type = Type.none
                                self.features.append(f)
                                continue

                # calculate distinct,ratio for categorical,temporal
                if f.type == Type.categorical or f.type == Type.temporal:
                    f.distinct = self.tuple_num
                    f.ratio = 1.0

                self.features.append(f)
        else:
            for column_id in range(self.column_num):
                f = Features(self.names[column_id], self.types[column_id],
                             self.origins[column_id])

                #calculate min,max for numerical,temporal
                if f.type == Type.numerical or f.type == Type.temporal:
                    f.min, f.max = min(self.T[column_id]), max(
                        self.T[column_id])
                    f.minmin = f.min
                    if f.min == f.max:
                        self.types[column_id] = f.type = Type.none
                        self.features.append(f)
                        continue

                d = {}
                #calculate distinct,ratio for categorical,temporal
                if f.type == Type.categorical or f.type == Type.temporal:
                    for i in range(self.tuple_num):
                        if self.D[i][column_id] in d:
                            d[self.D[i][column_id]] += 1
                        else:
                            d[self.D[i][column_id]] = 1
                    f.distinct = len(d)
                    f.ratio = 1.0 * f.distinct / self.tuple_num
                    f.distinct_values = [(k, d[k]) for k in sorted(d)]
                    if f.type == Type.temporal:
                        self.getIntervalBins(f)

                self.features.append(f)
示例#3
0
def which_chart(tables,table_num):
    i=1
    while i< table_num:
        charts = []
        self=tables[i]  # type: Table
        T=map(list,zip(*self.D))
        if self.transformed:
            for column_id in range(self.column_num):
                f = Features(self.names[column_id],self.types[column_id],self.origins[column_id])
                #calculate min,max for numerical
                if f.type==Type.numerical:
                    if self.classify_num==1 or not self.describe2:#not categorized or categorized scatter
                        f.min,f.max=min(T[column_id]),max(T[column_id])
                        f.minmin=f.min
                        if f.min==f.max:
                            self.types[column_id]=f.type=Type.none
                            self.features.append(f)
                            continue
                    else:
                        delta=self.tuple_num/self.classify_num
                        f.min=[min(T[column_id][class_id*delta:(class_id+1)*delta]) for class_id in range(self.classify_num)]
                        f.minmin=min(f.min)
                        f.max=[max(T[column_id][class_id*delta:(class_id+1)*delta]) for class_id in range(self.classify_num)]
                        if sum([f.max[class_id]-f.min[class_id] for class_id in range(self.classify_num)])==0:
                            self.types[column_id]=f.type=Type.none
                            self.features.append(f)
                            continue
                        if min(f.min)==max(f.min) and min(f.max)==max(f.max):
                            if sum([0 if T[column_id][class_id*delta:(class_id+1)*delta]==T[column_id][(class_id+1)*delta:(class_id+2)*delta] else 1 for class_id in range(self.classify_num-1)])==0:
                                self.types[column_id]=f.type=Type.none
                                self.features.append(f)
                                continue
                #calculate distinct,ratio for categorical,temporal
                if f.type==Type.categorical or f.type==Type.temporal:
                    f.distinct=self.tuple_num
                    f.ratio=1.0

                self.features.append(f)
        else:
            print "error:table"+str(i)+"not transformed"

        for j in range(self.column_num):
            for k in range(self.column_num):
                fj=self.features[j]
                fk=self.features[k]
                if fj.name[0:4]=='CNT(' and fj.name[-1]==')'and fk.ratio==1.0:
                    if fk.type==Type.numerical:
                        charts = []
                        charts.append(Chart.scatter)
                        charts.append(Chart.bar)
                        charts.append(Chart.pie)
                    if fk.type==Type.categorical:
                        charts = []
                        charts.append(Chart.bar)
                        charts.append(Chart.pie)
                    elif fk.type==Type.temporal:
                        charts = []
                        charts.append(Chart.bar)
                        charts.append(Chart.line)
                        # charts.append(Chart.pie)
                else:
                    charts = []
                for chart in charts:
                    v=View(self,k,j,-1,1,[T[k]],[T[j]],chart)
                    self.countviews.append(v)
                    self.countview_num+=1
                if charts:
                    while self.countview_num!=1:
                        if self.countviews[1]:
                            if  self.countviews[0].M<self.countviews[1].M:
                                del self.countviews[0]
                                self.countview_num-=1
                            else:
                                del self.countviews[1]
                                self.countview_num-=1
        i+=1
    return tables
示例#4
0
    def generateViews(self):
        T=map(list,zip(*self.D)) # the '*' is for unzipping self.D
        if self.transformed:
            for column_id in range(self.column_num):
                f = Features(self.names[column_id],self.types[column_id],self.origins[column_id])
                #calculate min,max for numerical
                if f.type==Type.numerical:
                    f.min,f.max=min(T[column_id]),max(T[column_id])
                    if f.min==f.max:
                        self.types[column_id]=f.type=Type.none
                        self.features.append(f)
                        continue # directly go to the next value in for loop

                #calculate distinct,ratio for categorical,temporal
                if f.type==Type.categorical or f.type==Type.temporal:
                    f.distinct=self.tuple_num
                    f.ratio=1.0

                self.features.append(f)
        else:
            for column_id in range(self.column_num):
                f = Features(self.names[column_id],self.types[column_id],self.origins[column_id])

                #calculate min,max for numerical,temporal
                if f.type==Type.numerical or f.type==Type.temporal:
                    f.min,f.max=min(T[column_id]),max(T[column_id])
                    if f.min==f.max:
                        self.types[column_id]=f.type=Type.none
                        self.features.append(f)
                        continue

                d={}
                #calculate distinct,ratio for categorical,temporal
                if f.type == Type.categorical or f.type == Type.temporal:
                    for i in range(self.tuple_num):
                        if self.D[i][column_id] in d:
                            d[self.D[i][column_id]]+=1
                        else:
                            d[self.D[i][column_id]]=1
                    f.distinct = len(d)
                    f.ratio = 1.0 * f.distinct / self.tuple_num
                    f.distinct_values=[(k,d[k]) for k in sorted(d)]
                    if f.type==Type.temporal:
                        self.getIntervalBins(f)

                self.features.append(f)


        #generate 2D views
        if self.describe2=='' and self.classify_id==-1:
            for i in range(self.column_num):
                for j in range(self.column_num):
                    if i==j: # all combinations of 2 columns except the same column
                        continue

                    fi=self.features[i]
                    fj=self.features[j]
                    if fi.type==Type.categorical and fj.type==Type.numerical and fi.ratio==1.0:
                        charts=[]
                        if fj.min>0 and fi.distinct<=5 and not (len(fj.name)>=6 and fj.name[0:4]=='AVG(' and fj.name[-1]==')'): # AVG makes no sense in pie chart
                            charts.append(Chart.pie)
                        if fi.distinct<=20:
                            charts.append(Chart.bar)
                    elif fi.type==Type.temporal and fj.type==Type.numerical and fi.ratio==1.0:
                        charts = []
                        '''if fj.min>0 and fi.distinct<=5 and not (len(fj.name)>=6 and fj.name[0:4]=='avg(' and fj.name[-1]==')'):
                            charts.append(Chart.pie)'''
                        if fi.distinct < 7:
                            charts.append(Chart.bar)
                        else:
                            charts.append(Chart.line)
                    elif (not self.transformed) and fi.type==Type.numerical and fj.type==Type.numerical and i<j:
                        charts=[Chart.scatter]
                    else:
                        charts=[]

                    for chart in charts:
                        v=View(self,i,j,-1,1,[T[i]],[T[j]],chart) # the function to visualize the prepared table
                        self.views.append(v)
                        self.view_num+=1



        #generate 3D views
        elif self.describe2:
            for i in range(self.column_num):
                for j in range(self.column_num):
                    fi=self.features[i]
                    fj=self.features[j]
                    if fi.type==Type.categorical and fj.type==Type.numerical and fj.min>0:
                        charts=[Chart.bar]
                    elif fi.type==Type.temporal and fj.type==Type.numerical:
                        if self.tuple_num/self.classify_num<7:
                            charts=[Chart.bar]
                        else:
                            charts=[Chart.line]
                    else:
                        charts=[]
                    for chart in charts:
                        delta=self.tuple_num/self.classify_num
                        series_data = [T[j][series * delta:(series + 1) * delta] for series in range(self.classify_num)]
                        v = View(self, i, j, self.classify_id, self.classify_num, [T[i][0:delta]], series_data, chart)
                        self.views.append(v)
                        self.view_num += 1
        else:
            for i in range(self.column_num):
                for j in range(self.column_num):
                    if i>=j or self.types[i]!=Type.numerical or self.types[j]!=Type.numerical:
                        continue
                    X=[]
                    Y=[]
                    id=0
                    for k in range(self.classify_num):
                        x=T[i][id:id+self.classes[k][1]]
                        y=T[j][id:id+self.classes[k][1]]
                        id+=self.classes[k][1]
                        X.append(x)
                        Y.append(y)
                    v=View(self,i,j,self.classify_id,self.classify_num,X,Y,Chart.scatter)
                    self.views.append(v)
                    self.view_num+=1

        self.instance.view_num+=self.view_num