Exemplo n.º 1
0
def createTbl(data,
              settings=None,
              _smote=False,
              isBin=False,
              bugThres=2,
              duplicate=False):
    """
  kwargs:
  _smote = True/False : SMOTE input data (or not)
  _isBin = True/False : Reduce bugs to defects/no defects
  _bugThres = int : Threshold for marking stuff as defective,
                    default = 1. Not defective => Bugs < 1
  """
    makeaModel = makeAmodel.makeAModel()
    _r = []
    for t in data:
        m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate)
        _r += m._rows
    m._rows = _r
    prepare(m, settings=None)  # Initialize all parameters for where2 to run
    tree = where2(m, m._rows)  # Decision tree using where2
    tbl = table(t)

    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = j.cells
            if isBin:
                tmp[-1] = 0 if tmp[-1] < bugThres else 1
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)

    return newTable(tbl, headerLabel, Rows)
Exemplo n.º 2
0
def createTbl(
        data, settings=None, _smote=False, isBin=False, bugThres=2, duplicate=False):
  """
  kwargs:
  _smote = True/False : SMOTE input data (or not)
  _isBin = True/False : Reduce bugs to defects/no defects
  _bugThres = int : Threshold for marking stuff as defective,
                    default = 1. Not defective => Bugs < 1
  """
  makeaModel = makeAmodel.makeAModel()
  _r = []
  for t in data:
    m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate)
    _r += m._rows
  m._rows = _r
  prepare(m, settings=None)  # Initialize all parameters for where2 to run
  tree = where2(m, m._rows)  # Decision tree using where2
  tbl = table(t)

  headerLabel = '=klass'
  Rows = []
  for k, _ in leaves(tree):  # for k, _ in leaves(tree):
    for j in k.val:
      tmp = j.cells
      if isBin:
        tmp[-1] = 0 if tmp[-1] < bugThres else 1
      tmp.append('_' + str(id(k) % 1000))
      j.__dict__.update({'cells': tmp})
      Rows.append(j.cells)

  return newTable(tbl, headerLabel, Rows)
Exemplo n.º 3
0
def tdivPrec(where=None, dtree=None, train=None, test=None):
    rseed(1)
    makeaModel = makeAModel()

    # pdb.set_trace()
    """
 Training
 """
    _r = []
    for t in train:
        m = makeaModel.csv2py(t)
        _r += m._rows
    m._rows = _r
    prepare(m, settings=where)  # Initialize all parameters for where2 to run
    tree = where2(m, m._rows)  # Decision tree using where2
    tbl = table(t)
    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    tbl2 = newTable(tbl, headerLabel, Rows)
    """
 Testing
 """
    _r = []
    for tt in test:
        mTst = makeaModel.csv2py(tt)
        _r += mTst._rows
    mTst._rows = _r
    prepare(mTst,
            settings=where)  # Initialize all parameters for where2 to run
    tree = where2(mTst, mTst._rows)  # Decision tree using where2
    tbl = table(tt)
    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    tbl3 = newTable(tbl, headerLabel, Rows)
    temp = []

    def sort(lst):
        return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \
               [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])]

    def thresh(val1, val2):
        indx, sorted = sort()

    def isdefective(case, test=False):
Exemplo n.º 4
0
def createDF(data):
    makeaModel = makeAModel()
    _r = []
    for t in data:
        m = makeaModel.csv2py(t)
        _r += m._rows
    m._rows = _r
    prepare(m)
    tree = where2(m, m._rows)
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('Class_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    return pd.DataFrame(Rows, columns=get_headers(data) + ['klass'])
Exemplo n.º 5
0
def createDF(data):
  makeaModel = makeAModel()
  _r = []
  for t in data:
    m = makeaModel.csv2py(t)
    _r += m._rows
  m._rows = _r
  prepare(m)
  tree = where2(m, m._rows)
  Rows = []
  for k, _ in leaves(tree):  # for k, _ in leaves(tree):
    for j in k.val:
      tmp = (j.cells)
      tmp.append('Class_' + str(id(k) % 1000))
      j.__dict__.update({'cells': tmp})
      Rows.append(j.cells)
  return pd.DataFrame(Rows, columns = get_headers(data) + ['klass'])
Exemplo n.º 6
0
def createTbl(data):
    makeaModel = makeAModel()
    _r = []
    for t in data:
        m = makeaModel.csv2py(t)
        _r += m._rows
    m._rows = _r
    prepare(m, settings=None)  # Initialize all parameters for where2 to run
    tree = where2(m, m._rows)  # Decision tree using where2
    tbl = table(t)
    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    return newTable(tbl, headerLabel, Rows)
Exemplo n.º 7
0
def createTbl(data):
 makeaModel = makeAModel()
 _r = []
 for t in data:
  m = makeaModel.csv2py(t)
  _r += m._rows
 m._rows = _r
 prepare(m, settings = None)  # Initialize all parameters for where2 to run
 tree = where2(m, m._rows)  # Decision tree using where2
 tbl = table(t)
 headerLabel = '=klass'
 Rows = []
 for k, _ in leaves(tree):  # for k, _ in leaves(tree):
  for j in k.val:
   tmp = (j.cells)
   tmp.append('_' + str(id(k) % 1000))
   j.__dict__.update({'cells': tmp})
   Rows.append(j.cells)
 return newTable(tbl, headerLabel, Rows)
Exemplo n.º 8
0
def tdivPrec(where = None , dtree = None, train = None, test = None):
 rseed(1)
 makeaModel = makeAModel()

 # pdb.set_trace()

 """
 Training
 """
 _r = []
 for t in train:
  m = makeaModel.csv2py(t)
  _r += m._rows
 m._rows = _r
 prepare(m, settings = where)  # Initialize all parameters for where2 to run
 tree = where2(m, m._rows)  # Decision tree using where2
 tbl = table(t)
 headerLabel = '=klass'
 Rows = []
 for k, _ in leaves(tree):  # for k, _ in leaves(tree):
  for j in k.val:
   tmp = (j.cells)
   tmp.append('_' + str(id(k) % 1000))
   j.__dict__.update({'cells': tmp})
   Rows.append(j.cells)
 tbl2 = newTable(tbl, headerLabel, Rows)


 """
 Testing
 """
 _r = []
 for tt in test:
  mTst = makeaModel.csv2py(tt)
  _r += mTst._rows
 mTst._rows = _r
 prepare(mTst, settings = where)  # Initialize all parameters for where2 to run
 tree = where2(mTst, mTst._rows)  # Decision tree using where2
 tbl = table(tt)
 headerLabel = '=klass'
 Rows = []
 for k, _ in leaves(tree):  # for k, _ in leaves(tree):
  for j in k.val:
   tmp = (j.cells)
   tmp.append('_' + str(id(k) % 1000))
   j.__dict__.update({'cells': tmp})
   Rows.append(j.cells)
 tbl3 = newTable(tbl, headerLabel, Rows)
 temp = []

 def sort(lst):
  return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \
         [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])]

 def thresh(val1, val2):
  indx, sorted = sort()
 def isdefective(case, test = False):
  if not test:
   return 'Defect' if case.cells[-2] > 0 else 'No Defect'
  else:
   bugs = [r.cells[-2] for r in case.rows];
   meanBugs = np.mean(bugs);
   medianBugs = np.median(bugs);
   rangeBugs = (sorted(bugs)[0] + sorted(bugs)[-1]) / 2;
   temp.append(meanBugs);
   return 'Defect' if meanBugs > 1.5 else 'No Defect'

 testCase = tbl3._rows
 # print testCase

 testDefective = []
 defectivClust = []

 t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
 myTree = tdiv(t, opt = dtree)
 # showTdiv(myTree)

 testCase = tbl3._rows
#   # print testCase

 for tC in testCase:
  loc = drop(tC, myTree)
  # if len(loc.kids)==0:
  testDefective.append(isdefective(tC))
  defectivClust.append(isdefective(loc, test = True))
 #
 saveImg(temp, 10)

#   contrastSet = getContrastSet(loc, myTree)
#   print 'Contrast Set:', contrastSet
 return [testDefective, defectivClust]