Exemplo n.º 1
0
def main():
  import input
  logging.basicConfig(level=logging.INFO, stream=sys.stdout)
  np.set_printoptions(precision=3, edgeitems=3, threshold=20)

  random.seed(5108) # used by the GA
  randSample = random.Random(input.SAMPLE_SEED) # used for data set sampling

  inp = input.Input("train3-std.tsv", randSample)
  print "Train set:",
  inp.trainSet.show()
  
  print "Test set:",
  inp.testSet.show()

  n = inp.trainSet.size * 20/100
  a = ANN()
  a.prepare(inp.trainSet, POPSIZE)
  
  tester = SampleTester()
  tester.prepare(inp.testSet, randSample)
  tester.showSampleSets()

  params = []
  generatePop(params)
  mutateValue = 6.0

  for genIndex in range(5000):
    print "Generation", genIndex, "starting."
    logFP("Population", params)
    outputValues = a.evaluate(params, returnOutputs=True)
    
    logFP("Outputs", outputValues)
    
    thresholds = a.nlargest(n)
    logFP("Thresholds", thresholds)

    lifts = a.lift(n)
    logFP("Lifts", lifts)

    taggedParams = sorted(zip(lifts, params, range(len(params))),
                          key=lambda (l, p, i): l,
                          reverse=True)
    sortedParams = [p for l, p, i in taggedParams]
    logFP("Sorted pop", sortedParams)

    testLift, _ = tester.test(sortedParams[0])

    genplot.addGeneration(lifts, testLift, genIndex)
    
    params = generateGeneration(sortedParams, mutateValue)
    if genIndex%500 == 499:
        mutateValue -= 0.5

  args = sys.argv[1:]
  if len(args) == 1:
    open(args[0], "w").write(repr(sortedParams[0]))

  genplot.plot()