def generateViews(self): T=map(list,zip(*self.D)) if self.transformed: for column_id in range(self.column_num): f = Features(self.names[column_id],self.types[column_id],self.origins[column_id]) #calculate min,max for numerical if f.type==Type.numerical: if self.classify_num==1 or not self.describe2:#not categorized or categorized scatter f.min,f.max=min(T[column_id]),max(T[column_id]) f.minmin=f.min if f.min==f.max: self.types[column_id]=f.type=Type.none self.features.append(f) continue else: delta=self.tuple_num/self.classify_num f.min=[min(T[column_id][class_id*delta:(class_id+1)*delta]) for class_id in range(self.classify_num)] f.minmin=min(f.min) f.max=[max(T[column_id][class_id*delta:(class_id+1)*delta]) for class_id in range(self.classify_num)] if sum([f.max[class_id]-f.min[class_id] for class_id in range(self.classify_num)])==0: self.types[column_id]=f.type=Type.none self.features.append(f) continue if min(f.min)==max(f.min) and min(f.max)==max(f.max): if sum([0 if T[column_id][class_id*delta:(class_id+1)*delta]==T[column_id][(class_id+1)*delta:(class_id+2)*delta] else 1 for class_id in range(self.classify_num-1)])==0: self.types[column_id]=f.type=Type.none self.features.append(f) continue #calculate distinct,ratio for categorical,temporal if f.type==Type.categorical or f.type==Type.temporal: f.distinct=self.tuple_num f.ratio=1.0 self.features.append(f) else: for column_id in range(self.column_num): f = Features(self.names[column_id],self.types[column_id],self.origins[column_id]) #calculate min,max for numerical,temporal if f.type==Type.numerical or f.type==Type.temporal: f.min,f.max=min(T[column_id]),max(T[column_id]) f.minmin=f.min if f.min==f.max: self.types[column_id]=f.type=Type.none self.features.append(f) continue d={} #calculate distinct,ratio for categorical,temporal if f.type == Type.categorical or f.type == Type.temporal: for i in range(self.tuple_num): if self.D[i][column_id] in d: d[self.D[i][column_id]]+=1 else: d[self.D[i][column_id]]=1 f.distinct = len(d) if f.distinct==1: self.types[column_id]=f.type=Type.none self.features.append(f) continue f.ratio = 1.0 * f.distinct / self.tuple_num f.distinct_values=[(k,d[k]) for k in sorted(d)] if f.type==Type.temporal: self.getIntervalBins(f) self.features.append(f) #generate 2D views if self.describe2=='' and self.classify_id==-1: for i in range(self.column_num): for j in range(self.column_num): if i==j: continue fi=self.features[i] fj=self.features[j] if fi.type==Type.categorical and fj.type==Type.numerical and fi.ratio==1.0: charts=[] if fj.minmin>0 and fi.distinct<=5 and not (len(fj.name)>=6 and fj.name[0:4]=='AVG(' and fj.name[-1]==')'): charts.append(Chart.pie) if fi.distinct<=20: charts.append(Chart.bar) elif fi.type==Type.temporal and fj.type==Type.numerical and fi.ratio==1.0: charts=[] if fi.distinct<7: charts.append(Chart.bar) else: charts.append(Chart.line) elif (not self.transformed) and fi.type==Type.numerical and fj.type==Type.numerical and i<j: charts=[Chart.scatter] else: charts=[] for chart in charts: v=View(self,i,j,-1,1,[T[i]],[T[j]],chart) self.views.append(v) self.view_num+=1 #generate 3D views elif self.describe2: for i in range(self.column_num): for j in range(self.column_num): fi=self.features[i] fj=self.features[j] if fi.type==Type.categorical and fj.type==Type.numerical and fj.minmin>0: charts=[Chart.bar] elif fi.type==Type.temporal and fj.type==Type.numerical: if self.tuple_num/self.classify_num<7: charts=[Chart.bar] else: charts=[Chart.line] else: charts=[] for chart in charts: delta=self.tuple_num/self.classify_num series_data = [T[j][series * delta:(series + 1) * delta] for series in range(self.classify_num)] v = View(self, i, j, self.classify_id, self.classify_num, [T[i][0:delta]], series_data, chart) self.views.append(v) self.view_num += 1 else: for i in range(self.column_num): for j in range(self.column_num): if i>=j or self.types[i]!=Type.numerical or self.types[j]!=Type.numerical: continue X=[] Y=[] id=0 for k in range(self.classify_num): x=T[i][id:id+self.classes[k][1]] y=T[j][id:id+self.classes[k][1]] id+=self.classes[k][1] X.append(x) Y.append(y) v=View(self,i,j,self.classify_id,self.classify_num,X,Y,Chart.scatter) self.views.append(v) self.view_num+=1 self.instance.view_num+=self.view_num
def getFeatures(self): self.T = map(list, zip(*self.D)) if self.transformed == True: for column_id in range(self.column_num): f = Features(self.names[column_id], self.types[column_id], self.origins[column_id]) # calculate min,max for numerical if f.type == Type.numerical: if self.classify_num == 1 or not self.describe2: # not categorized or categorized scatter f.min, f.max = min(self.T[column_id]), max( self.T[column_id]) f.minmin = f.min if f.min == f.max: self.types[column_id] = f.type = Type.none self.features.append(f) continue else: delta = self.tuple_num / self.classify_num f.min = [ min(self.T[column_id][class_id * delta:(class_id + 1) * delta]) for class_id in range(self.classify_num) ] f.minmin = min(f.min) f.max = [ max(self.T[column_id][class_id * delta:(class_id + 1) * delta]) for class_id in range(self.classify_num) ] if sum([ f.max[class_id] - f.min[class_id] for class_id in range(self.classify_num) ]) == 0: self.types[column_id] = f.type = Type.none self.features.append(f) continue if min(f.min) == max(f.min) and min(f.max) == max( f.max): if sum([ 0 if self.T[column_id][class_id * delta:(class_id + 1) * delta] == self.T[column_id][(class_id + 1) * delta:(class_id + 2) * delta] else 1 for class_id in range(self.classify_num - 1) ]) == 0: self.types[column_id] = f.type = Type.none self.features.append(f) continue # calculate distinct,ratio for categorical,temporal if f.type == Type.categorical or f.type == Type.temporal: f.distinct = self.tuple_num f.ratio = 1.0 self.features.append(f) else: for column_id in range(self.column_num): f = Features(self.names[column_id], self.types[column_id], self.origins[column_id]) #calculate min,max for numerical,temporal if f.type == Type.numerical or f.type == Type.temporal: f.min, f.max = min(self.T[column_id]), max( self.T[column_id]) f.minmin = f.min if f.min == f.max: self.types[column_id] = f.type = Type.none self.features.append(f) continue d = {} #calculate distinct,ratio for categorical,temporal if f.type == Type.categorical or f.type == Type.temporal: for i in range(self.tuple_num): if self.D[i][column_id] in d: d[self.D[i][column_id]] += 1 else: d[self.D[i][column_id]] = 1 f.distinct = len(d) f.ratio = 1.0 * f.distinct / self.tuple_num f.distinct_values = [(k, d[k]) for k in sorted(d)] if f.type == Type.temporal: self.getIntervalBins(f) self.features.append(f)
def which_chart(tables,table_num): i=1 while i< table_num: charts = [] self=tables[i] # type: Table T=map(list,zip(*self.D)) if self.transformed: for column_id in range(self.column_num): f = Features(self.names[column_id],self.types[column_id],self.origins[column_id]) #calculate min,max for numerical if f.type==Type.numerical: if self.classify_num==1 or not self.describe2:#not categorized or categorized scatter f.min,f.max=min(T[column_id]),max(T[column_id]) f.minmin=f.min if f.min==f.max: self.types[column_id]=f.type=Type.none self.features.append(f) continue else: delta=self.tuple_num/self.classify_num f.min=[min(T[column_id][class_id*delta:(class_id+1)*delta]) for class_id in range(self.classify_num)] f.minmin=min(f.min) f.max=[max(T[column_id][class_id*delta:(class_id+1)*delta]) for class_id in range(self.classify_num)] if sum([f.max[class_id]-f.min[class_id] for class_id in range(self.classify_num)])==0: self.types[column_id]=f.type=Type.none self.features.append(f) continue if min(f.min)==max(f.min) and min(f.max)==max(f.max): if sum([0 if T[column_id][class_id*delta:(class_id+1)*delta]==T[column_id][(class_id+1)*delta:(class_id+2)*delta] else 1 for class_id in range(self.classify_num-1)])==0: self.types[column_id]=f.type=Type.none self.features.append(f) continue #calculate distinct,ratio for categorical,temporal if f.type==Type.categorical or f.type==Type.temporal: f.distinct=self.tuple_num f.ratio=1.0 self.features.append(f) else: print "error:table"+str(i)+"not transformed" for j in range(self.column_num): for k in range(self.column_num): fj=self.features[j] fk=self.features[k] if fj.name[0:4]=='CNT(' and fj.name[-1]==')'and fk.ratio==1.0: if fk.type==Type.numerical: charts = [] charts.append(Chart.scatter) charts.append(Chart.bar) charts.append(Chart.pie) if fk.type==Type.categorical: charts = [] charts.append(Chart.bar) charts.append(Chart.pie) elif fk.type==Type.temporal: charts = [] charts.append(Chart.bar) charts.append(Chart.line) # charts.append(Chart.pie) else: charts = [] for chart in charts: v=View(self,k,j,-1,1,[T[k]],[T[j]],chart) self.countviews.append(v) self.countview_num+=1 if charts: while self.countview_num!=1: if self.countviews[1]: if self.countviews[0].M<self.countviews[1].M: del self.countviews[0] self.countview_num-=1 else: del self.countviews[1] self.countview_num-=1 i+=1 return tables
def generateViews(self): T=map(list,zip(*self.D)) # the '*' is for unzipping self.D if self.transformed: for column_id in range(self.column_num): f = Features(self.names[column_id],self.types[column_id],self.origins[column_id]) #calculate min,max for numerical if f.type==Type.numerical: f.min,f.max=min(T[column_id]),max(T[column_id]) if f.min==f.max: self.types[column_id]=f.type=Type.none self.features.append(f) continue # directly go to the next value in for loop #calculate distinct,ratio for categorical,temporal if f.type==Type.categorical or f.type==Type.temporal: f.distinct=self.tuple_num f.ratio=1.0 self.features.append(f) else: for column_id in range(self.column_num): f = Features(self.names[column_id],self.types[column_id],self.origins[column_id]) #calculate min,max for numerical,temporal if f.type==Type.numerical or f.type==Type.temporal: f.min,f.max=min(T[column_id]),max(T[column_id]) if f.min==f.max: self.types[column_id]=f.type=Type.none self.features.append(f) continue d={} #calculate distinct,ratio for categorical,temporal if f.type == Type.categorical or f.type == Type.temporal: for i in range(self.tuple_num): if self.D[i][column_id] in d: d[self.D[i][column_id]]+=1 else: d[self.D[i][column_id]]=1 f.distinct = len(d) f.ratio = 1.0 * f.distinct / self.tuple_num f.distinct_values=[(k,d[k]) for k in sorted(d)] if f.type==Type.temporal: self.getIntervalBins(f) self.features.append(f) #generate 2D views if self.describe2=='' and self.classify_id==-1: for i in range(self.column_num): for j in range(self.column_num): if i==j: # all combinations of 2 columns except the same column continue fi=self.features[i] fj=self.features[j] if fi.type==Type.categorical and fj.type==Type.numerical and fi.ratio==1.0: charts=[] if fj.min>0 and fi.distinct<=5 and not (len(fj.name)>=6 and fj.name[0:4]=='AVG(' and fj.name[-1]==')'): # AVG makes no sense in pie chart charts.append(Chart.pie) if fi.distinct<=20: charts.append(Chart.bar) elif fi.type==Type.temporal and fj.type==Type.numerical and fi.ratio==1.0: charts = [] '''if fj.min>0 and fi.distinct<=5 and not (len(fj.name)>=6 and fj.name[0:4]=='avg(' and fj.name[-1]==')'): charts.append(Chart.pie)''' if fi.distinct < 7: charts.append(Chart.bar) else: charts.append(Chart.line) elif (not self.transformed) and fi.type==Type.numerical and fj.type==Type.numerical and i<j: charts=[Chart.scatter] else: charts=[] for chart in charts: v=View(self,i,j,-1,1,[T[i]],[T[j]],chart) # the function to visualize the prepared table self.views.append(v) self.view_num+=1 #generate 3D views elif self.describe2: for i in range(self.column_num): for j in range(self.column_num): fi=self.features[i] fj=self.features[j] if fi.type==Type.categorical and fj.type==Type.numerical and fj.min>0: charts=[Chart.bar] elif fi.type==Type.temporal and fj.type==Type.numerical: if self.tuple_num/self.classify_num<7: charts=[Chart.bar] else: charts=[Chart.line] else: charts=[] for chart in charts: delta=self.tuple_num/self.classify_num series_data = [T[j][series * delta:(series + 1) * delta] for series in range(self.classify_num)] v = View(self, i, j, self.classify_id, self.classify_num, [T[i][0:delta]], series_data, chart) self.views.append(v) self.view_num += 1 else: for i in range(self.column_num): for j in range(self.column_num): if i>=j or self.types[i]!=Type.numerical or self.types[j]!=Type.numerical: continue X=[] Y=[] id=0 for k in range(self.classify_num): x=T[i][id:id+self.classes[k][1]] y=T[j][id:id+self.classes[k][1]] id+=self.classes[k][1] X.append(x) Y.append(y) v=View(self,i,j,self.classify_id,self.classify_num,X,Y,Chart.scatter) self.views.append(v) self.view_num+=1 self.instance.view_num+=self.view_num