Python MaulProblem примеры использования

Язык программирования: Python

Пространство имен/Пакет: MaulProblem

Класс/Тип: MaulProblem

Примеров на hotexamples.com: 4

Python MaulProblem - 4 примера найдено. Это лучшие примеры Python кода для MaulProblem.MaulProblem, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MaulProblem(1)

decide(1)

generateModel(1)

saveModel(1)

Пример #1

Показать файл

    def crossValidate(self,
                      category,
                      trainingProportion,
                      totalProportion,
                      constraints,
                      val='1'):

        # Load the samples
        self.loadSamples(category, constraints)

        # Select the training data
        self.prepareTrainingData(trainingProportion, totalProportion)

        # Make a decision problem
        self.problem = MaulProblem(category, self.params)

        # If the model is not already generated, generate it
        '''
    if not self.problem.haveModel():
      self.problem.generateModel(self.trainingData)
      self.problem.saveModel()
    else:
      self.problem.loadModel()
    '''
        self.problem.generateModel(self.trainingData)
        self.problem.saveModel()

        # Validate
        if val == 1 and trainingProportion < 1.0:
            return self.validate()
        else:
            print "No validation, either requested none, or no test samples"
            return

Пример #2

Показать файл

Файл: MaulDataset.py Проект: 904labs/maul

  def crossValidate(self, category, trainingProportion, totalProportion, constraints,val='1'):

    # Load the samples
    self.loadSamples(category, constraints)

    # Select the training data
    self.prepareTrainingData(trainingProportion, totalProportion)

    # Make a decision problem
    self.problem = MaulProblem(category, self.params)

    # If the model is not already generated, generate it
    '''
    if not self.problem.haveModel():
      self.problem.generateModel(self.trainingData)
      self.problem.saveModel()
    else:
      self.problem.loadModel()
    '''
    self.problem.generateModel(self.trainingData)
    self.problem.saveModel()

    # Validate
    if val == 1 and trainingProportion < 1.0:
        return self.validate()
    else:
        print "No validation, either requested none, or no test samples"
        return

Пример #3

Показать файл

Файл: MaulDataset.py Проект: 904labs/maul

class MaulDataset:

  # Constructor
  def __init__(self, dbName, params):

    # Store Params
    self.params = params
    self.dbName = dbName

  # Standard Cross-Validation Routine
  def crossValidate(self, category, trainingProportion, totalProportion, constraints,val='1'):

    # Load the samples
    self.loadSamples(category, constraints)

    # Select the training data
    self.prepareTrainingData(trainingProportion, totalProportion)

    # Make a decision problem
    self.problem = MaulProblem(category, self.params)

    # If the model is not already generated, generate it
    '''
    if not self.problem.haveModel():
      self.problem.generateModel(self.trainingData)
      self.problem.saveModel()
    else:
      self.problem.loadModel()
    '''
    self.problem.generateModel(self.trainingData)
    self.problem.saveModel()

    # Validate
    if val == 1 and trainingProportion < 1.0:
        return self.validate()
    else:
        print "No validation, either requested none, or no test samples"
        return


  # Helper routine to load the samples into a dictionary
  def loadSamples(self, category, constraints):

    # Open Database
    conn = sqlite3.connect(self.dbName)
    conn.text_factory = str
    c = conn.cursor()

    # Query
    c.execute('select uaString, Tokens, ' + category + ' from data ' + constraints)

    # Organize our samples by label
    self.samples = dict()
    for item in c:
      label = item[2]
      uaString = item[0]
      tokens = [int(s) for s in item[1].split(' ')]
      if not self.samples.has_key(label):
        self.samples[label] = []
      self.samples[label].append({"uaString" : uaString, "tokens": tokens, "label" : label})


  # Helper routine to pull some samples out into an array of training data
  def prepareTrainingData(self, trainingProportion, totalProportion):

    # Process the data to extract training data (everything that remains
    # is test data)
    self.trainingData = []
    print "Processing Samples..."
    for key in self.samples.keys():

      # Shuffle the data
      random.shuffle(self.samples[key])

      # If we want to use less than our total amount of data (for speed),
      # shrink the list now
      if totalProportion < 1.0:
        del self.samples[key][int(len(self.samples[key]) * totalProportion):]

      # Pick out training data
      numTrainingSamples = int(len(self.samples[key]) * trainingProportion)
      for i in range(0, numTrainingSamples):
        self.trainingData.append(self.prepareSample(self.samples[key].pop()))
      print "Selected " + str(numTrainingSamples) + " samples of type " + key + " for "\
            "training, leaving " + str(len(self.samples[key])) + " for validation"


  # Helper routine to put a sample in the format MaulSVM expects given
  # the problem parameters
  def prepareSample(self, sample):

    if self.params.dataType == "string":
      return (sample["label"], sample["uaString"])
    elif self.params.dataType == "tokens":
      return (sample["label"], sample["tokens"])
    elif self.params.dataType == "vector":
      vec = []
      lasti = -1
      for i in sorted(sample["tokens"]):
        if (i == lasti):
          old = vec.pop()
          vec.append((i, old[1] + 1))
        else:
          vec.append((i, 1))
        lasti = i
      return (sample["label"], vec)
    else:
      raise ValueError("Unknown datatype: " + str(self.params.dataType))

  # Validation routine
  def validate(self):

    # Statistics
    correct = 0.0
    misses = []
    falsePositives = dict()
    falseNegatives = dict()
    for key in self.samples.keys():
      falsePositives[key] = 0
      falseNegatives[key] = 0

    # Generate a flat list of all of our training samples
    sampleList = list()
    for key in self.samples.keys():
      sampleList.extend(self.samples[key])

    # Iterate over each sample, comparing prediction with actual label
    for sample in sampleList:
      prepared = self.prepareSample(sample)
      prediction = self.problem.decide(prepared[1])
      if prediction == sample['label']:
        correct += 1
      else:
        misses.append("Predicted: " + prediction + ", Actual: " + \
                      sample['label'] + ', string: ' + sample['uaString'])
        falsePositives[prediction] += 1
        falseNegatives[sample['label']] += 1

    print "ACCURACY: "  + str(correct / len(sampleList))
    print "False Positives: " + str(falsePositives)
    print "False Negatives: " + str(falseNegatives)
    print "Misses:"
    for line in misses:
      print line

    # Return statistics
    return {"correct" : correct,
            "total" : len(sampleList),
            "falsePositives" : falsePositives,
            "falseNegatives" : falseNegatives,
            "misses" : misses}

Пример #4

Показать файл

class MaulDataset:

    # Constructor
    def __init__(self, dbName, params):

        # Store Params
        self.params = params
        self.dbName = dbName

    # Standard Cross-Validation Routine
    def crossValidate(self,
                      category,
                      trainingProportion,
                      totalProportion,
                      constraints,
                      val='1'):

        # Load the samples
        self.loadSamples(category, constraints)

        # Select the training data
        self.prepareTrainingData(trainingProportion, totalProportion)

        # Make a decision problem
        self.problem = MaulProblem(category, self.params)

        # If the model is not already generated, generate it
        '''
    if not self.problem.haveModel():
      self.problem.generateModel(self.trainingData)
      self.problem.saveModel()
    else:
      self.problem.loadModel()
    '''
        self.problem.generateModel(self.trainingData)
        self.problem.saveModel()

        # Validate
        if val == 1 and trainingProportion < 1.0:
            return self.validate()
        else:
            print "No validation, either requested none, or no test samples"
            return

    # Helper routine to load the samples into a dictionary
    def loadSamples(self, category, constraints):

        # Open Database
        conn = sqlite3.connect(self.dbName)
        conn.text_factory = str
        c = conn.cursor()

        # Query
        c.execute('select uaString, Tokens, ' + category + ' from data ' +
                  constraints)

        # Organize our samples by label
        self.samples = dict()
        for item in c:
            label = item[2]
            uaString = item[0]
            tokens = [int(s) for s in item[1].split(' ')]
            if not self.samples.has_key(label):
                self.samples[label] = []
            self.samples[label].append({
                "uaString": uaString,
                "tokens": tokens,
                "label": label
            })

    # Helper routine to pull some samples out into an array of training data
    def prepareTrainingData(self, trainingProportion, totalProportion):

        # Process the data to extract training data (everything that remains
        # is test data)
        self.trainingData = []
        print "Processing Samples..."
        for key in self.samples.keys():

            # Shuffle the data
            random.shuffle(self.samples[key])

            # If we want to use less than our total amount of data (for speed),
            # shrink the list now
            if totalProportion < 1.0:
                del self.samples[key][
                    int(len(self.samples[key]) * totalProportion):]

            # Pick out training data
            numTrainingSamples = int(
                len(self.samples[key]) * trainingProportion)
            for i in range(0, numTrainingSamples):
                self.trainingData.append(
                    self.prepareSample(self.samples[key].pop()))
            print "Selected " + str(numTrainingSamples) + " samples of type " + key + " for "\
                  "training, leaving " + str(len(self.samples[key])) + " for validation"

    # Helper routine to put a sample in the format MaulSVM expects given
    # the problem parameters
    def prepareSample(self, sample):

        if self.params.dataType == "string":
            return (sample["label"], sample["uaString"])
        elif self.params.dataType == "tokens":
            return (sample["label"], sample["tokens"])
        elif self.params.dataType == "vector":
            vec = []
            lasti = -1
            for i in sorted(sample["tokens"]):
                if (i == lasti):
                    old = vec.pop()
                    vec.append((i, old[1] + 1))
                else:
                    vec.append((i, 1))
                lasti = i
            return (sample["label"], vec)
        else:
            raise ValueError("Unknown datatype: " + str(self.params.dataType))

    # Validation routine
    def validate(self):

        # Statistics
        correct = 0.0
        misses = []
        falsePositives = dict()
        falseNegatives = dict()
        for key in self.samples.keys():
            falsePositives[key] = 0
            falseNegatives[key] = 0

        # Generate a flat list of all of our training samples
        sampleList = list()
        for key in self.samples.keys():
            sampleList.extend(self.samples[key])

        # Iterate over each sample, comparing prediction with actual label
        for sample in sampleList:
            prepared = self.prepareSample(sample)
            prediction = self.problem.decide(prepared[1])
            if prediction == sample['label']:
                correct += 1
            else:
                misses.append("Predicted: " + prediction + ", Actual: " + \
                              sample['label'] + ', string: ' + sample['uaString'])
                falsePositives[prediction] += 1
                falseNegatives[sample['label']] += 1

        print "ACCURACY: " + str(correct / len(sampleList))
        print "False Positives: " + str(falsePositives)
        print "False Negatives: " + str(falseNegatives)
        print "Misses:"
        for line in misses:
            print line

        # Return statistics
        return {
            "correct": correct,
            "total": len(sampleList),
            "falsePositives": falsePositives,
            "falseNegatives": falseNegatives,
            "misses": misses
        }