示例#1
0
  def __init__(self, params, dataset, weightSVM = True, callback = None, pool = None, looDist = 1.1):
    """Trains the model given the dataset and either a params object or a iterator of params objects. If a list it trys all entrys of the list for each pairing, and selects the one that gives the best loo, i.e. does model selection. If weightSVM is True (The default) then it makes use of the leave one out scores calculated during model selection to weight the classification boundaries - this can result in slightly better behavour at the meeting points of multiple classes in feature space. The pool parameter can be passed in a Pool() object from the multiprocessing python module, or set to True to have it create an instance itself. This enables multiprocessor mode for doing each loo calculation required - good if you have lots of models to test and/or lots of labels."""
    self.weightSVM = weightSVM

    # Get a list of labels, create all the relevant pairings. A mapping from labels to numbers is used...
    self.labels = dataset.getLabels()
    self.labelToNum = dict()
    for i,label in enumerate(self.labels):
      self.labelToNum[label] = i
    
    self.models = dict()
    for lNeg in xrange(len(self.labels)):
      for lPos in xrange(lNeg+1,len(self.labels)):
        #print self.labels[lNeg], self.labels[lPos]
        self.models[(lNeg,lPos)] = None

    # Generate the list of models that need solving...
    solveList = []
    for lNeg,lPos in self.models.keys():
      if isinstance(params,Params):
        solveList.append((lNeg,lPos,params))
      else:
        for p in params:
          solveList.append((lNeg,lPos,p))

    # Loop through all models and solve them, reporting progress if required...
    if pool==None:
      # Single process implimentation...
      for i,data in enumerate(solveList):
        lNeg,lPos,params = data
        if callback: callback(i,len(solveList))

        model = looPairRange(params, dataset.getTrainData(self.labels[lNeg], self.labels[lPos]), looDist)
        #print model[0], looPair(params, dataset.getTrainData(self.labels[lNeg], self.labels[lPos]))[0], looPairBrute(params, dataset.getTrainData(self.labels[lNeg], self.labels[lPos]))[0]
        if self.models[lNeg,lPos]==None or model[0]>self.models[lNeg,lPos][0]:
          self.models[lNeg,lPos] = model
    else:
      # Multiprocess implimentation...

      # Create a pool if it hasn't been provided...
      if type(pool)==type(True):
        pool = mp.Pool()

      # Callback for when each job completes...
      self.numComplete = 0
      if callback: callback(self.numComplete,len(solveList))
      
      def taskComplete(ret):
        self.numComplete += 1
        if callback: callback(self.numComplete,len(solveList))
        
        lNeg = ret[0]
        lPos = ret[1]
        model = (ret[2],ret[3])
        
        if self.models[lNeg,lPos]==None or model[0]>self.models[lNeg,lPos][0]:
          self.models[lNeg,lPos] = model
      
      try:
        # Create all the jobs, set them running...
        jobs = []
        for lNeg,lPos,params in solveList:
          jobs.append(pool.apply_async(mpLooPairRange,(params,dataset.getTrainData(self.labels[lNeg], self.labels[lPos]), lNeg, lPos, looDist), callback = taskComplete))
        
      finally:
        # Wait for them all to complete...
         while len(jobs)!=0:
          if jobs[0].ready():
            del jobs[0]
            continue
          time.sleep(0.1)
示例#2
0
def mpLooPairRange(params, data, lNeg, lPos, looDist):
  """multiprocess wrapper around looPair needed for multiprocessing support."""
  model = looPairRange(params, data, looDist)
  return (lNeg,lPos,model[0],model[1])