Exemplo n.º 1
0
def ReadQuantExamples(inFile):
  """ reads the examples from a .qdat file

    **Arguments**

      - inFile: a file object

    **Returns**

      a 2-tuple containing:

        1) the names of the examples

        2) a list of lists containing the examples themselves

    **Note**

      because this is reading a .qdat file, it assumed that all variable values
      are integers
      
  """
  expr1 = re.compile(r'^#')
  expr2 = re.compile(r'[\ ]*|[\t]*')
  examples = []
  names = []
  inLine = inFile.readline()
  while inLine:
    if expr1.search(inLine) is None:
      resArr = expr2.split(inLine)
      if len(resArr)>1:
        examples.append(list(map(lambda x: int(x),resArr[1:])))
        names.append(resArr[0])
    inLine = inFile.readline()
  return names,examples
Exemplo n.º 2
0
def ReadQuantExamples(inFile):
    """ reads the examples from a .qdat file

    **Arguments**

      - inFile: a file object

    **Returns**

      a 2-tuple containing:

        1) the names of the examples

        2) a list of lists containing the examples themselves

    **Note**

      because this is reading a .qdat file, it assumed that all variable values
      are integers

  """
    expr1 = re.compile(r'^#')
    expr2 = re.compile(r'[\ ]*|[\t]*')
    examples = []
    names = []
    inLine = inFile.readline()
    while inLine:
        if expr1.search(inLine) is None:
            resArr = expr2.split(inLine)
            if len(resArr) > 1:
                examples.append(list(map(lambda x: int(x), resArr[1:])))
                names.append(resArr[0])
        inLine = inFile.readline()
    return names, examples
Exemplo n.º 3
0
 def testMultSplit3(self):
   """  4 possible results
   """
   d = [(1.,0),
        (1.1,0),
        (1.2,0),
        (1.4,2),
        (1.4,2),
        (1.6,2),
        (2.,2),
        (2.1,1),
        (2.1,1),
        (2.1,1),
        (2.2,1),
        (2.3,1),
        (3.0,3),
        (3.1,3),
        (3.2,3),
        (3.3,3)]
   varValues, resCodes = zip(*d)
   nPossibleRes =4
   res = Quantize.FindVarMultQuantBounds(varValues,3,resCodes,nPossibleRes)
   target = ([1.30, 2.05, 2.65],1.97722)
   assert Quantize.feq(res[1],target[1],1e-4),\
          'InfoGain comparison failed: %s != %s'%(res[1],target[1])
   assert min(map(lambda x,y:Quantize.feq(x,y,1e-4),res[0],target[0]))==1,\
          'split bound comparison failed: %s != %s'%(res[0],target[0])
Exemplo n.º 4
0
def CalcNPossibleUsingMap(data, order, qBounds, nQBounds=None):
    """ calculates the number of possible values for each variable in a data set

   **Arguments**

     - data: a list of examples

     - order: the ordering map between the variables in _data_ and _qBounds_

     - qBounds: the quantization bounds for the variables

   **Returns**

      a list with the number of possible values each variable takes on in the data set

   **Notes**

     - variables present in _qBounds_ will have their _nPossible_ number read
       from _qbounds

     - _nPossible_ for other numeric variables will be calculated

  """
    numericTypes = [int, float]
    if six.PY2:
        numericTypes.append(long)

    print('order:', order, len(order))
    print('qB:', qBounds)
    #print('nQB:',nQBounds, len(nQBounds))
    assert (qBounds and len(order)==len(qBounds)) or (nQBounds and len(order)==len(nQBounds)),\
           'order/qBounds mismatch'
    nVars = len(order)
    nPossible = [-1] * nVars
    cols = range(nVars)
    for i in xrange(nVars):
        if nQBounds and nQBounds[i] != 0:
            nPossible[i] = -1
            cols.remove(i)
        elif len(qBounds[i]) > 0:
            nPossible[i] = len(qBounds[i])
            cols.remove(i)

    nPts = len(data)
    for i in xrange(nPts):
        for col in cols[:]:
            d = data[i][order[col]]
            if type(d) in numericTypes:
                if int(d) == d:
                    nPossible[col] = max(int(d), nPossible[col])
                else:
                    nPossible[col] = -1
                    cols.remove(col)
            else:
                print('bye bye col %d: %s' % (col, repr(d)))
                nPossible[col] = -1
                cols.remove(col)

    return list(map(lambda x: int(x) + 1, nPossible))
Exemplo n.º 5
0
def CalcNPossibleUsingMap(data,order,qBounds,nQBounds=None):
  """ calculates the number of possible values for each variable in a data set

   **Arguments**

     - data: a list of examples

     - order: the ordering map between the variables in _data_ and _qBounds_

     - qBounds: the quantization bounds for the variables

   **Returns**

      a list with the number of possible values each variable takes on in the data set

   **Notes**

     - variables present in _qBounds_ will have their _nPossible_ number read
       from _qbounds

     - _nPossible_ for other numeric variables will be calculated

  """
  numericTypes = [int, float]
  if six.PY2:
    numericTypes.append(long)
    
  print('order:',order, len(order))
  print('qB:',qBounds)
  #print('nQB:',nQBounds, len(nQBounds))
  assert (qBounds and len(order)==len(qBounds)) or (nQBounds and len(order)==len(nQBounds)),\
         'order/qBounds mismatch'
  nVars = len(order)
  nPossible = [-1]*nVars
  cols = range(nVars)
  for i in xrange(nVars):
    if nQBounds and nQBounds[i] != 0:
      nPossible[i] = -1
      cols.remove(i)
    elif len(qBounds[i])>0:
      nPossible[i] = len(qBounds[i])
      cols.remove(i)

  nPts = len(data)
  for i in xrange(nPts):
    for col in cols[:]:
      d = data[i][order[col]]
      if type(d) in numericTypes:
        if int(d) == d:
          nPossible[col] = max(int(d),nPossible[col])
        else:
          nPossible[col] = -1
          cols.remove(col)
      else:
        print('bye bye col %d: %s'%(col,repr(d)))
        nPossible[col] = -1
        cols.remove(col)

  return list(map(lambda x:int(x)+1,nPossible))
Exemplo n.º 6
0
 def testOneSplit3(self):
   """ optimal division not possibe
   """
   d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.2, 1),
        (2.3, 1)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 3
   res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes)
   target = (1.3, 0.88129)
   assert list(map(lambda x,y:Quantize.feq(x,y,1e-4),res,target))==[1,1],\
          'result comparison failed: %s != %s'%(res,target)
Exemplo n.º 7
0
 def testOneSplit2_noise(self):
   # """ some noise """
   d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 0), (1.4, 1), (1.6, 0), (2., 1), (2.1, 1), (2.2, 1),
        (2.3, 1)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 2
   res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes)
   target = (1.8, 0.60999)
   self.assertEqual(
     list(map(lambda x, y: Quantize.feq(x, y, 1e-4), res, target)), [1, 1],
     'result comparison failed: %s != %s' % (res, target))
Exemplo n.º 8
0
 def testOneSplit4(self):
   """ lots of duplicates
   """
   d = [(1., 0), (1.1, 0), (1.2, 0), (1.2, 1), (1.4, 0), (1.4, 0), (1.6, 0), (2., 1), (2.1, 1),
        (2.1, 1), (2.1, 1), (2.1, 1), (2.2, 1), (2.3, 1)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 2
   res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes)
   target = (1.8, 0.68939)
   assert list(map(lambda x,y:Quantize.feq(x,y,1e-4),res,target))==[1,1],\
          'result comparison failed: %s != %s'%(res,target)
Exemplo n.º 9
0
 def testOneSplit5(self):
   """ same as testOneSplit1 data, but out of order
   """
   d = [(1., 0), (1.1, 0), (2.2, 1), (1.2, 0), (1.6, 0), (1.4, 0), (2., 1), (2.1, 1), (1.4, 0),
        (2.3, 1)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 2
   res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes)
   target = (1.8, 0.97095)
   assert list(map(lambda x,y:Quantize.feq(x,y,1e-4),res,target))==[1,1],\
          'result comparison failed: %s != %s'%(res,target)
Exemplo n.º 10
0
 def testOneSplit2_noise(self):
     # """ some noise """
     d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 0), (1.4, 1), (1.6, 0),
          (2., 1), (2.1, 1), (2.2, 1), (2.3, 1)]
     varValues, resCodes = zip(*d)
     nPossibleRes = 2
     res = Quantize.FindVarQuantBound(varValues, resCodes, nPossibleRes)
     target = (1.8, 0.60999)
     self.assertEqual(
         list(map(lambda x, y: Quantize.feq(x, y, 1e-4), res, target)),
         [1, 1], 'result comparison failed: %s != %s' % (res, target))
Exemplo n.º 11
0
 def testMultSplit2(self):
   """ same test as testMultSplit1, but out of order
   """
   d = [(1., 0), (2.1, 1), (1.1, 0), (1.2, 0), (1.4, 2), (1.6, 2), (2., 2), (1.4, 2), (2.1, 1),
        (2.2, 1), (2.1, 1), (2.3, 1)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 3
   res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes)
   target = ([1.3, 2.05], 1.55458)
   assert Quantize.feq(res[1],target[1],1e-4),\
          'InfoGain comparison failed: %s != %s'%(res[1],target[1])
   assert min(map(lambda x,y:Quantize.feq(x,y,1e-4),res[0],target[0]))==1,\
          'split bound comparison failed: %s != %s'%(res[0],target[0])
Exemplo n.º 12
0
 def testMultSplit5(self):
   """ dual valued, with an island, a bit noisy
   """
   d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0),
        (2.1, 0), (2.2, 1), (2.3, 0)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 2
   res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes)
   target = ([1.3, 2.05], .34707)
   assert Quantize.feq(res[1],target[1],1e-4),\
          'InfoGain comparison failed: %s != %s'%(res[1],target[1])
   assert min(map(lambda x,y:Quantize.feq(x,y,1e-4),res[0],target[0]))==1,\
          'split bound comparison failed: %s != %s'%(res[0],target[0])
Exemplo n.º 13
0
 def testMultSplit3(self):
   """  4 possible results
   """
   d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.1, 1),
        (2.1, 1), (2.2, 1), (2.3, 1), (3.0, 3), (3.1, 3), (3.2, 3), (3.3, 3)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 4
   res = Quantize.FindVarMultQuantBounds(varValues, 3, resCodes, nPossibleRes)
   target = ([1.30, 2.05, 2.65], 1.97722)
   assert Quantize.feq(res[1],target[1],1e-4),\
          'InfoGain comparison failed: %s != %s'%(res[1],target[1])
   assert min(map(lambda x,y:Quantize.feq(x,y,1e-4),res[0],target[0]))==1,\
          'split bound comparison failed: %s != %s'%(res[0],target[0])
Exemplo n.º 14
0
 def testMultSplit4_dualValued_island(self):
   # """ dual valued, with an island """
   d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 1), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0),
        (2.1, 0), (2.2, 0), (2.3, 0)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 2
   res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes)
   target = ([1.3, 2.05], .91830)
   self.assertTrue(
     Quantize.feq(res[1], target[1], 1e-4),
     'InfoGain comparison failed: %s != %s' % (res[1], target[1]))
   self.assertEqual(
     min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1,
     'split bound comparison failed: %s != %s' % (res[0], target[0]))
Exemplo n.º 15
0
 def testMultSplit1_simple_dual(self):
   # """ simple dual split """
   d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2), (2., 2), (2.1, 1), (2.1, 1),
        (2.1, 1), (2.2, 1), (2.3, 1)]
   varValues, resCodes = zip(*d)
   nPossibleRes = 3
   res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes)
   target = ([1.3, 2.05], 1.55458)
   self.assertEqual(
     min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])), 1,
     'split bound comparison failed: %s != %s' % (res[0], target[0]))
   self.assertTrue(
     Quantize.feq(res[1], target[1], 1e-4),
     'InfoGain comparison failed: %s != %s' % (res[1], target[1]))
Exemplo n.º 16
0
 def testMultSplit1_simple_dual(self):
     # """ simple dual split """
     d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 2), (1.4, 2), (1.6, 2),
          (2., 2), (2.1, 1), (2.1, 1), (2.1, 1), (2.2, 1), (2.3, 1)]
     varValues, resCodes = zip(*d)
     nPossibleRes = 3
     res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes,
                                           nPossibleRes)
     target = ([1.3, 2.05], 1.55458)
     self.assertEqual(
         min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])),
         1, 'split bound comparison failed: %s != %s' % (res[0], target[0]))
     self.assertTrue(
         Quantize.feq(res[1], target[1], 1e-4),
         'InfoGain comparison failed: %s != %s' % (res[1], target[1]))
Exemplo n.º 17
0
 def testMultSplit4_dualValued_island(self):
     # """ dual valued, with an island """
     d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 1), (1.6, 1),
          (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 0), (2.3, 0)]
     varValues, resCodes = zip(*d)
     nPossibleRes = 2
     res = Quantize.FindVarMultQuantBounds(varValues, 2, resCodes,
                                           nPossibleRes)
     target = ([1.3, 2.05], .91830)
     self.assertTrue(
         Quantize.feq(res[1], target[1], 1e-4),
         'InfoGain comparison failed: %s != %s' % (res[1], target[1]))
     self.assertEqual(
         min(map(lambda x, y: Quantize.feq(x, y, 1e-4), res[0], target[0])),
         1, 'split bound comparison failed: %s != %s' % (res[0], target[0]))
Exemplo n.º 18
0
def WriteData(outFile, varNames, qBounds, examples):
    """ writes out a .qdat file

    **Arguments**

      - outFile: a file object

      - varNames: a list of variable names

      - qBounds: the list of quantization bounds (should be the same length
         as _varNames_)

      - examples: the data to be written

  """
    outFile.write('# Quantized data from DataUtils\n')
    outFile.write('# ----------\n')
    outFile.write('# Variable Table\n')
    for i in xrange(len(varNames)):
        outFile.write('# %s %s\n' % (varNames[i], str(qBounds[i])))
    outFile.write('# ----------\n')
    for example in examples:
        outFile.write(' '.join(map(str, example)) + '\n')
Exemplo n.º 19
0
def WriteData(outFile,varNames,qBounds,examples):
  """ writes out a .qdat file

    **Arguments**

      - outFile: a file object

      - varNames: a list of variable names

      - qBounds: the list of quantization bounds (should be the same length
         as _varNames_)

      - examples: the data to be written

  """
  outFile.write('# Quantized data from DataUtils\n')
  outFile.write('# ----------\n')
  outFile.write('# Variable Table\n')
  for i in xrange(len(varNames)):
    outFile.write('# %s %s\n'%(varNames[i],str(qBounds[i])))
  outFile.write('# ----------\n')
  for example in examples:
    outFile.write(' '.join(map(str,example))+'\n')
Exemplo n.º 20
0

#hascQuantize=0
if hascQuantize:
    _RecurseOnBounds = cQuantize._RecurseOnBounds
    _FindStartPoints = cQuantize._FindStartPoints
else:
    _RecurseOnBounds = _NewPyRecurseOnBounds
    _FindStartPoints = _NewPyFindStartPoints

if __name__ == '__main__':
    import sys
    if 1:
        d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1),
             (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 1), (2.3, 0)]
        varValues = list(map(lambda x: x[0], d))
        resCodes = list(map(lambda x: x[1], d))
        nPossibleRes = 2
        res = FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes)
        print('RES:', res)
        target = ([1.3, 2.05], .34707)
    else:
        d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1),
             (2., 1), (2.1, 0), (2.1, 0), (2.1, 0), (2.2, 1), (2.3, 0)]
        varValues = list(map(lambda x: x[0], d))
        resCodes = list(map(lambda x: x[1], d))
        nPossibleRes = 2
        res = FindVarMultQuantBounds(varValues, 1, resCodes, nPossibleRes)
        print(res)
        #sys.exit(1)
        d = [(1.4, 1), (1.4, 0)]
Exemplo n.º 21
0
  return quantBounds, maxGain

# hascQuantize=0
if hascQuantize:
  _RecurseOnBounds = cQuantize._RecurseOnBounds
  _FindStartPoints = cQuantize._FindStartPoints
else:
  _RecurseOnBounds = _NewPyRecurseOnBounds
  _FindStartPoints = _NewPyFindStartPoints

if __name__ == '__main__':
  if 1:
    d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0),
         (2.1, 0), (2.2, 1), (2.3, 0)]
    varValues = list(map(lambda x: x[0], d))
    resCodes = list(map(lambda x: x[1], d))
    nPossibleRes = 2
    res = FindVarMultQuantBounds(varValues, 2, resCodes, nPossibleRes)
    print('RES:', res)
    target = ([1.3, 2.05], .34707)
  else:
    d = [(1., 0), (1.1, 0), (1.2, 0), (1.4, 1), (1.4, 0), (1.6, 1), (2., 1), (2.1, 0), (2.1, 0),
         (2.1, 0), (2.2, 1), (2.3, 0)]
    varValues = list(map(lambda x: x[0], d))
    resCodes = list(map(lambda x: x[1], d))
    nPossibleRes = 2
    res = FindVarMultQuantBounds(varValues, 1, resCodes, nPossibleRes)
    print(res)
    # sys.exit(1)
    d = [(1.4, 1), (1.4, 0)]