Exemplo n.º 1
0
 def diffkeys_explore(self, key1, key2, dump_sample=0):
     '''return list of difference observations comparing two fields'''
     out = []
     vals = self.fields(key1, key2)
     notequal = vals[0] != vals[1]
     diffcnt = notequal.sum()
     if diffcnt == 0:
         return out
     tcodes = [get_format(val) for val in vals]
     tchars = [tcode[0] for tcode in tcodes]
     # are all values within floating point fuzz?
     if not tchars.count('a') and na.allclose(*vals):
         out.append('no differences using allclose()')
     # real differences exist
     out.append('%s values differ (%1.2f%% of %s)' %
                (diffcnt, 100.0 * diffcnt / len(self), len(self)))
     if tcodes[0] != tcodes[1]:
         out.append('field types differ %s' % str(tuple(tcodes)))
     # skip detail if any field is alpha type
     if tchars.count('a'):
         return out
     # differences as different types?
     for typestr in ('Int', 'Bool'):
         tvals = [v.astype(typestr) for v in vals]
         if na.allclose(*tvals):
             out.append('field values match as type(%r)' % typestr)
     # extract differences and examine in greater detail
     dvals = [any_compress(notequal, val) for val in vals]
     nzmask = [(dval != 0) for dval in dvals]
     if (nzmask[0] != nzmask[1]).sum() == 0:
         # all zeros match, compare the nonzero values
         nzvals = [any_compress(nzmask[0], dval) for dval in dvals]
         ratio = nzvals[1].astype('Float') / nzvals[0].astype('Float')
         factor = ratio.mean()
         if na.allclose(ratio, factor):
             out.append('field values differ by constant factor: %f' %
                        factor)
     delta = dvals[0] - dvals[1]
     dmin, dmax = delta.min(), delta.max()
     out.append('difference mean=%f range=%f (%s to %s)' %
                (delta.mean(), dmax - dmin, dmin, dmax))
     if dump_sample:
         tmp = UniTable()
         tmp['_idx_'] = diffidx = na.nonzero(notequal)[0]
         for key in (key1, key2):
             tmp[key] = self.field(key)[notequal]
         if len(tmp) > dump_sample:
             tmp.resize(dump_sample)
         out.extend(str(tmp).split('\n'))
     return out
Exemplo n.º 2
0
 def diffkeys_explore(self,key1,key2,dump_sample=0):
   '''return list of difference observations comparing two fields'''
   out = []
   vals = self.fields(key1,key2)
   notequal = vals[0] != vals[1]
   diffcnt = notequal.sum()
   if diffcnt == 0:
     return out
   tcodes = [get_format(val) for val in vals]
   tchars = [tcode[0] for tcode in tcodes]
   # are all values within floating point fuzz?
   if not tchars.count('a') and na.allclose(*vals):
     out.append('no differences using allclose()')
   # real differences exist
   out.append('%s values differ (%1.2f%% of %s)' % (
     diffcnt,100.0*diffcnt/len(self),len(self)))
   if tcodes[0] != tcodes[1]:
     out.append('field types differ %s' % str(tuple(tcodes)))
   # skip detail if any field is alpha type
   if tchars.count('a'):
     return out
   # differences as different types?
   for typestr in ('Int','Bool'):
     tvals = [v.astype(typestr) for v in vals]
     if na.allclose(*tvals):
       out.append('field values match as type(%r)' % typestr)
   # extract differences and examine in greater detail
   dvals = [any_compress(notequal,val) for val in vals]
   nzmask = [(dval != 0) for dval in dvals]
   if (nzmask[0] != nzmask[1]).sum() == 0:
     # all zeros match, compare the nonzero values
     nzvals = [any_compress(nzmask[0],dval) for dval in dvals]
     ratio = nzvals[1].astype('Float') / nzvals[0].astype('Float')
     factor = ratio.mean()
     if na.allclose(ratio,factor):
       out.append('field values differ by constant factor: %f' % factor)
   delta = dvals[0] - dvals[1]
   dmin,dmax = delta.min(),delta.max()
   out.append('difference mean=%f range=%f (%s to %s)' % (
       delta.mean(),dmax-dmin,dmin,dmax))
   if dump_sample:
     tmp = UniTable()
     tmp['_idx_'] = diffidx = na.nonzero(notequal)[0]
     for key in (key1,key2):
       tmp[key] = self.field(key)[notequal]
     if len(tmp) > dump_sample:
       tmp.resize(dump_sample)
     out.extend(str(tmp).split('\n'))
   return out
Exemplo n.º 3
0
 def compress(self,mask):
   '''return new table containing only rows where mask is nonzero'''
   out = self._new_hook()
   for key in self._keys:
     out[key] = any_compress(mask,self._data[key])
   out._postinit_hook()
   return out
Exemplo n.º 4
0
 def compress(self, mask):
     '''return new table containing only rows where mask is nonzero'''
     out = self._new_hook()
     for key in self._keys:
         out[key] = any_compress(mask, self._data[key])
     out._postinit_hook()
     return out