Exemplo n.º 1
0
def createTbl(
    data,
    settings=None,
    _smote=False,
    isBin=False,
    bugThres=1,
        duplicate=False):
  """
  kwargs:
  _smote = True/False : SMOTE input data (or not)
  _isBin = True/False : Reduce bugs to defects/no defects
  _bugThres = int : Threshold for marking stuff as defective,
                    default = 1. Not defective => Bugs < 1
  """
  makeaModel = makeAmodel.makeAModel()
  _r = []
  for t in data:
      m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate)
      _r += m._rows
  m._rows = _r
  prepare(m, settings=None)  # Initialize all parameters for where2 to run
  # print("WHERE start")

  tree = where2(m, m._rows)  # Decision tree using where2
  # print tree
  # import pdb
  # pdb.set_trace()
  # print("WHERE end")
  tbl = table(t)

  headerLabel = '=klass'
  Rows = []
  for k, _ in leaves(tree):  # for k, _ in leaves(tree):
      for j in k.val:
          tmp = j.cells
          if isBin:
              tmp[-1] = 0 if tmp[-1] < bugThres else 1
          tmp.append('_' + str(id(k)))
          j.__dict__.update({'cells': tmp})
          Rows.append(j.cells)

  return newTable(tbl, headerLabel, Rows)
Exemplo n.º 2
0
def createTbl(data,
              settings=None,
              _smote=False,
              isBin=False,
              bugThres=1,
              duplicate=False):
    """
  kwargs:
  _smote = True/False : SMOTE input Data (or not)
  _isBin = True/False : Reduce bugs to defects/no defects
  _bugThres = int : Threshold for marking stuff as defective,
                    default = 1. Not defective => Bugs < 1
  """
    makeaModel = makeAmodel.makeAModel()
    _r = []
    for t in data:
        m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate)
        _r += m._rows
    m._rows = _r
    prepare(m, settings=None)  # Initialize all parameters for where2 to run
    # print("WHERE start")

    tree = where2(m, m._rows)  # Decision tree using where2
    # print tree
    # import pdb
    # pdb.set_trace()
    # print("WHERE end")
    tbl = table(t)

    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = j.cells
            if isBin:
                tmp[-1] = 0 if tmp[-1] < bugThres else 1
            tmp.append('_' + str(id(k)))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)

    return newTable(tbl, headerLabel, Rows)
Exemplo n.º 3
0
 def csv2py(self, filename, _smote=False, duplicate=False):
     "Convert a csv file to a model file"
     tbl = table(filename)
     # if _smote:
     #   tbl = smote.SMOTE(
     #       tbl,
     #       atleast=50,
     #       atmost=101,
     #       bugIndx=1,
     #       resample=duplicate)
     self.str2num(tbl)
     tonum = lambda x: self.translate[x] if isinstance(x, str) else x
     """ There's a bug in table.py that doesn't separate dependent and independent
   Variable. The following, badly written, piece of code corrects for it...
 """
     for indx, k in enumerate(tbl.indep):
         for l in tbl.depen:
             if k.name == l.name:
                 tbl.indep.pop(indx)
     return self.data(indep=[i.name for i in tbl.indep],
                      less=[i.name for i in tbl.depen],
                      _rows=map(lambda x: [tonum(xx) for xx in x.cells],
                                tbl._rows))
Exemplo n.º 4
0
  def csv2py(self, filename, _smote=False, duplicate=False):
    "Convert a csv file to a model file"
    tbl = table(filename)
    # if _smote:
    #   tbl = smote.SMOTE(
    #       tbl,
    #       atleast=50,
    #       atmost=101,
    #       bugIndx=1,
    #       resample=duplicate)
    self.str2num(tbl)
    tonum = lambda x: self.translate[x] if isinstance(x, str) else x

    """ There's a bug in table.py that doesn't separate dependent and independent
      Variable. The following, badly written, piece of code corrects for it...
  """
    for indx, k in enumerate(tbl.indep):
      for l in tbl.depen:
        if k.name == l.name:
          tbl.indep.pop(indx)
    return self.data(indep=[i.name for i in tbl.indep],
                     less=[i.name for i in tbl.depen],
                     _rows=map(lambda x: [tonum(xx) for xx in x.cells],
                               tbl._rows))