示例#1
0
  def prepare(self, testSet, rand, numSamples=20, samplePercent=50):
    """Prepares for bootstrap estimation of lift of one ANN.
    
    @param testSet: test set on which to test the ANN
    @type testSet: input.DataSet
    @param rand: source of randomness for bootstrap samples
    @type rand: random.Random
    @param numSamples: number of bootstrap samples
    @type numSamples: int
    @param samplePercent: size of each sample, in percent of the testSet
    @type samplePercent: int
    """
    self.sampleSets = []
    self.anns = []
    for sampleIndex in range(numSamples):
      sampleSet = testSet.sample(samplePercent, rand)
      self.sampleSets.append(sampleSet)

      a = ANN()
      a.prepare(sampleSet, popSize=1)
      self.anns.append(a)

    self.sampleSize = self.sampleSets[0].size
示例#2
0
def main():
  import input
  logging.basicConfig(level=logging.INFO, stream=sys.stdout)
  np.set_printoptions(precision=3, edgeitems=3, threshold=20)

  random.seed(80085) # used by the GA
  randSample = random.Random(input.SAMPLE_SEED) # used for data set sampling

  inp = input.Input("train3-std.tsv", randSample)
  print "Train set:",
  inp.trainSet.show()
  
  print "Test set:",
  inp.testSet.show()

  n = inp.trainSet.size * 20/100
  a = ANN()
  a.prepare(inp.trainSet, POPSIZE)
  
  tester = SampleTester()
  tester.prepare(inp.testSet, randSample)
  tester.showSampleSets()

  params = []
  generatePop(params)

  for genIndex in range(TOTAL_GENS):
    print "Generation", genIndex, "starting."
    logFP("Population", params)
    outputValues = a.evaluate(params, returnOutputs=True)
    
    logFP("Outputs", outputValues)
    
    thresholds = a.nlargest(n)
    logFP("Thresholds", thresholds)

    lifts = a.lift(n)
    logFP("Lifts", lifts)

    taggedParams = sorted(zip(lifts, params, range(len(params))),
                          key=lambda (l, p, i): l,
                          reverse=True)
    sortedParams = [p for l, p, i in taggedParams]
    logFP("Sorted pop", sortedParams)

    testLift, _ = tester.test(sortedParams[0])

    genplot.addGeneration(lifts, testLift, genIndex)

    mutateValue = 1.0
    params = generateGeneration(sortedParams, mutateValue)

  args = sys.argv[1:]
  if len(args) == 1:
    open(args[0], "w").write(repr(sortedParams[0]))

  genplot.plot()