Exemplo n.º 1
    def __init__(self, filename, target, inputs=None, threshold=1.0e-9):
        CountsBase.__init__(self, threshold=threshold)
        if isinstance(filename, UniTable):
            data = filename
            data = UniTable().fromfile(filename)
        self.model_build(data, target, inputs)

        self.verify_result = verify = UniTable()
        verify['orig'] = data[self.target]
        verify['pred'] = self.model_predict(data)
        verify['agree'] = verify['orig'] == verify['pred']
        self.accuracy = float(verify['agree'].sum()) / len(data)
Exemplo n.º 2
 def __str__(self):
     out = UniTable()
     out['key'] = self.keys()
     out['name'] = self.names()
     out['expr'] = self.values()
     out['rule'] = self.rules()
     return str(out)
Exemplo n.º 3
 def __str__(self):
   out = UniTable()
   out['(#)'] = list(self.rows) + ['_totals_']
   col_sums = self.col_sums()
   for i,col in enumerate(self.cols):
     out[col] = list(self.matrix[:,i]) + [col_sums[i]]
   out['_totals_'] = list(self.row_sums()) + [self.sum()]
   return str(out)
Exemplo n.º 4
 def handle_select(self, opt, tbl):
     fldexpr = FieldExprList(*opt.select)
     rules = fldexpr.rules()
     tbl = EvalTable(rules).update(tbl)
     out = UniTable()
     for key, name in zip(fldexpr.keys(), fldexpr.names()):
         out[name] = tbl[key]
     return out
Exemplo n.º 5
 def export(self):
     out = UniTable()
     out['(#)'] = list(self.rownames) + ['_totals_']
     col_sums = self.col_sums()
     for i, col in enumerate(self.colnames):
         out[col] = list(self.matrix[:, i]) + [col_sums[i]]
     out['_totals_'] = list(self.row_sums()) + [self.sum()]
     return out
Exemplo n.º 6
 def __init__(self,filename=None,keys=[]):
   self.keys = keys
   self.data = data = {'':0}  # try to pre-assign empty string value
   self.filename = filename
   if filename and os.path.exists(filename):
     from augustus.unitable import UniTable
     tbl = UniTable().fromfile(filename)
     for i,value in it.izip(tbl['index'],tbl['data']):
       data[value] = i
     del tbl
Exemplo n.º 7
 def __call__(self,data):
   state = self._state = UniTable()
   state['data'] = data
   state['nullmodel'] = self.nullmodel(state['data'])
   state['altmodel'] = self.altmodel(state['data'])
   state['odds'] = state['altmodel']/state['nullmodel']
   state['log_odds'] = na.log(state['odds'])
   state['cusum'] = list(gen_cusum(state['log_odds'],self.reset_value))
   state['score'] = state['cusum'] >self.threshold
   return state['score'][-1]
Exemplo n.º 8
 def _make_tbl(self, cfunc, ccfunc):
     out = UniTable()
     ikvlist = list(self.iter_ikv())
     out['__fld__'] = [''] + [ikv[0] for ikv in ikvlist]
     out['__val__'] = [''] + [ikv[1] for ikv in ikvlist]
     for tval in self.all_tval():
         value = cfunc(tval)
         ikv_vals = [ccfunc(tval, ikey, ival) for (ikey, ival) in ikvlist]
         out[str(tval)] = [value] + ikv_vals
     return str(out)
Exemplo n.º 9
 def flush(self):
   if self.filename and len(self.data) > 1:
     from augustus.unitable import UniTable
     tbl = UniTable(keys=['index','data'])
     tmp = self.data.items()
     tbl['index'] = [x[1] for x in tmp]
     tbl['data'] = [x[0] for x in tmp]
     del tmp
     del tbl
Exemplo n.º 10
def top_ten(filenames):

    # track values for each field
    seen_fields = {}
    total_recs = 0

    # read each file in turn
    for filename in filenames:
        tbl = UniTable()

        keys = tbl.keys()[:]
        if '_count_' in keys:
            total_recs += tbl['_count_'].sum()
            total_recs += len(tbl)
            tbl['_count_'] = 1

        # read each column in turn
        for key in keys:
            seen_values = seen_fields.setdefault(key, {})

            # iterate over counts and values
            for cnt, value in izip(tbl['_count_'], tbl[key]):
                    seen_values[value] += cnt
                except KeyError:
                    seen_values[value] = cnt

    # report results
    for key, seen_values in seen_fields.items():

        # find top ten
        top_cnts = sorted(seen_values.values())
        cutoff = top_cnts[-10:][0]
        tmp = sorted([cnt, value] for (value, cnt) in seen_values.items()
                     if cnt >= cutoff)
        top = reversed(tmp[-10:])

        # report
        print 'Field:', key
        for (cnt, value) in top:
            percent = 100.0 * cnt / float(total_recs)
            print '\t(%8.5f%%) %r' % (percent, value)
Exemplo n.º 11
 def handle_arg(self, opt, arg):
     tbl = UniTable().fromfile(arg)
     if opt.select:
         tbl = self.handle_select(opt, tbl)
     tbl = self.handle_counttable(opt, arg, tbl)
     print tbl.export().to_csv_str()
Exemplo n.º 12
 def __init__(self,nullmodel,altmodel,threshold,reset_value=0.0):
   self.nullmodel = nullmodel
   self.altmodel = altmodel
   self.threshold = threshold
   self.reset_value = reset_value
   self._state = UniTable()