Пример #1
0
    def learn(self, trainExamples, validationExamples, loss, lossGradient,
              options):
        self.weights = util.Counter()
        random.seed(42)
        initStepSize = options.initStepSize
        stepSizeReduction = options.stepSizeReduction
        regularization = options.regularization

        # You should go over the training data numRounds times.
        # Each round, go through all the examples in some random order and update
        # the weights with respect to the gradient.
        for round in range(0, options.numRounds):
            random.shuffle(trainExamples)
            numUpdates = 0  # Should be incremented with each example and determines the step size.

            # Loop over the training examples and update the weights based on loss and regularization.
            # If your code runs slowly, try to explicitly write out the dot products
            # in the code here (e.g., "for key,value in counter: counter[key] += ---"
            # rather than "counter * other_vector")
            for x, y in trainExamples:
                numUpdates += 1
                stepSize = initStepSize / (numUpdates**stepSizeReduction)
                lossTerm = lossGradient(self.featureExtractor(x), y,
                                        self.weights) * stepSize
                if regularization != 0:
                    regTerm = self.weights * (regularization /
                                              len(trainExamples))
                    self.weights = self.weights - lossTerm - regTerm
                else:
                    self.weights = self.weights - lossTerm
            # Compute the objective function.
            # Here, we have split the objective function into two components:
            # the training loss, and the regularization penalty.
            # The objective function is the sum of these two values
            trainLoss = 0  # Training loss
            regularizationPenalty = 0  # L2 Regularization penalty
            for x, y in trainExamples:
                trainLoss += loss(self.featureExtractor(x), y, self.weights)
            regularizationPenalty += 0.5 * (self.weights * self.weights)
            self.objective = trainLoss + regularizationPenalty

            # See how well we're doing on our actual goal (error rate).
            trainError = util.getClassificationErrorRate(
                trainExamples, self.predict, 'train', options.verbose,
                self.featureExtractor, self.weights)
            validationError = util.getClassificationErrorRate(
                validationExamples, self.predict, 'validation',
                options.verbose, self.featureExtractor, self.weights)

            print "Round %s/%s: objective = %.2f = %.2f + %.2f, train error = %.4f, validation error = %.4f" % (
                round + 1, options.numRounds, self.objective, trainLoss,
                regularizationPenalty, trainError, validationError)

        # Print out feature weights
        out = open('weights', 'w')
        for f, v in sorted(self.weights.items(), key=lambda x: -x[1]):
            print >> out, f + "\t" + str(v)
        out.close()
Пример #2
0
  def learn(self, trainExamples, validationExamples, loss, lossGradient, options):
    self.weights = util.Counter()
    random.seed(42)
    initStepSize = options.initStepSize
    stepSizeReduction = options.stepSizeReduction
    regularization = options.regularization

    # You should go over the training data numRounds times.
    # Each round, go through all the examples in some random order and update
    # the weights with respect to the gradient.
    for round in range(0, options.numRounds):
      random.shuffle(trainExamples)
      numUpdates = 0  # Should be incremented with each example and determines the step size.

      # Loop over the training examples and update the weights based on loss and regularization.
      # If your code runs slowly, try to explicitly write out the dot products
      # in the code here (e.g., "for key,value in counter: counter[key] += ---"
      # rather than "counter * other_vector")
      for x, y in trainExamples:
        numUpdates += 1
        stepSize = initStepSize/(numUpdates**stepSizeReduction)
        lossTerm = lossGradient(self.featureExtractor(x), y, self.weights)*stepSize
        if regularization != 0:
          regTerm = self.weights*(regularization/len(trainExamples))
          self.weights = self.weights - lossTerm - regTerm
        else:
          self.weights = self.weights - lossTerm
      # Compute the objective function.
      # Here, we have split the objective function into two components:
      # the training loss, and the regularization penalty.
      # The objective function is the sum of these two values
      trainLoss = 0  # Training loss
      regularizationPenalty = 0  # L2 Regularization penalty
      for x, y in trainExamples:
        trainLoss += loss(self.featureExtractor(x), y, self.weights)
      regularizationPenalty += 0.5*(self.weights*self.weights)
      self.objective = trainLoss + regularizationPenalty

      # See how well we're doing on our actual goal (error rate).
      trainError = util.getClassificationErrorRate(trainExamples, self.predict, 'train', options.verbose, self.featureExtractor, self.weights)
      validationError = util.getClassificationErrorRate(validationExamples, self.predict, 'validation', options.verbose, self.featureExtractor, self.weights)

      print "Round %s/%s: objective = %.2f = %.2f + %.2f, train error = %.4f, validation error = %.4f" % (round+1, options.numRounds, self.objective, trainLoss, regularizationPenalty, trainError, validationError)

    # Print out feature weights
    out = open('weights', 'w')
    for f, v in sorted(self.weights.items(), key=lambda x: -x[1]):
      print >>out, f + "\t" + str(v)
    out.close()
Пример #3
0
    def learn(self, trainExamples, validationExamples, loss, lossGradient,
              options):
        self.weights = util.Counter()
        random.seed(42)

        # You should go over the training data numRounds times.
        # Each round, go through all the examples in some random order and update
        # the weights with respect to the gradient.
        for r in xrange(0, options.numRounds):
            random.shuffle(trainExamples)
            numUpdates = 0  # Should be incremented with each example and determines the step size.

            # Loop over the training examples and update the weights based on loss and regularization.
            # If your code runs slowly, try to explicitly write out the dot products
            # in the code here (e.g., "for key,value in counter: counter[key] += ---"
            # rather than "counter * other_vector")
            for x, y in trainExamples:
                numUpdates += 1
                "*** YOUR CODE HERE (around 7 lines of code expected) ***"
                featureVector = self.featureExtractor(x)
                lossGrad = lossGradient(featureVector, y, self.weights)
                stepSize = options.initStepSize / (numUpdates**
                                                   options.stepSizeReduction)
                if (options.regularization > 0):
                    c = float(options.regularization) / len(trainExamples)
                    for fKey in self.weights.iterkeys():
                        if self.weights[fKey] != 0 or lossGrad[fKey] != 0:
                            self.weights[fKey] -= stepSize * (
                                lossGrad[fKey] + c * self.weights[fKey])
                else:
                    for fKey, fLoss in lossGrad.iteritems():
                        self.weights[fKey] -= stepSize * fLoss

            # Compute the objective function.
            # Here, we have split the objective function into two components:
            # the training loss, and the regularization penalty.
            # The objective function is the sum of these two values
            trainLoss = 0  # Training loss
            regularizationPenalty = 0  # L2 Regularization penalty
            "*** YOUR CODE HERE (around 5 lines of code expected) ***"
            for x, y in trainExamples:
                featureVector = self.featureExtractor(x)
                trainLoss += loss(featureVector, y, self.weights)
            for weight in self.weights.itervalues():
                if weight > 0:
                    regularizationPenalty += weight**2
            regularizationPenalty *= 0.5 * options.regularization
            self.objective = trainLoss + regularizationPenalty

            # See how well we're doing on our actual goal (error rate).
            trainError = util.getClassificationErrorRate(
                trainExamples, self.predict, 'train', options.verbose,
                self.featureExtractor, self.weights)
            validationError = util.getClassificationErrorRate(
                validationExamples, self.predict, 'validation',
                options.verbose, self.featureExtractor, self.weights)

            print "Round %s/%s: objective = %.2f = %.2f + %.2f, train error = %.4f, validation error = %.4f" % (
                r + 1, options.numRounds, self.objective, trainLoss,
                regularizationPenalty, trainError, validationError)

        # Print out feature weights
        out = open('weights', 'w')
        for f, v in sorted(self.weights.items(), key=lambda x: -x[1]):
            print >> out, f + "\t" + str(v)
        out.close()
Пример #4
0
  def learn(self, trainExamples, validationExamples, trainKickingExamples, validationKickingExamples, loss, lossGradient, options):
    self.weights = util.Counter()
    random.seed(42)
    
    # You should go over the training data numRounds times.
    # Each round, go through all the examples in some random order and update
    # the weights with respect to the gradient.
    for round in range(0, options.numRounds):
      random.shuffle(trainExamples)
      numUpdates = 0  # Should be incremented with each example and determines the step size.
      trainingSize = len(trainExamples)
      # Loop over the training examples and update the weights based on loss and regularization.
      # If your code runs slowly, try to explicitly write out the dot products
      # in the code here (e.g., "for key,value in counter: counter[key] += ---"
      # rather than "counter * other_vector")
      for x, y in trainExamples:
        numUpdates += 1
        stepSize = options.initStepSize / (numUpdates**options.stepSizeReduction)
        featx = self.featureExtractor(x)
        gradient = lossGradient(featx, y, self.weights)
        if (gradient.totalCount() != 0):
            updater = (gradient * stepSize)
            for key in updater:
                self.weights[key] -= updater[key]
        if (options.regularization != 0):
            for key in gradient:
                self.weights[key] -= stepSize * (self.weights[key] * (options.regularization / trainingSize))

      # Compute the objective function.
      # Here, we have split the objective function into two components:
      # the training loss, and the regularization penalty.
      # The objective function is the sum of these two values
      trainLoss = 0  # Training loss
      regularizationPenalty = 0  # L2 Regularization penalty
      
      for x, y in trainExamples:
          featx = self.featureExtractor(x)
          trainLoss += loss(featx, y, self.weights)
      if (options.regularization != 0):
          for key in self.weights:
              regularizationPenalty += (self.weights[key]**2)
          regularizationPenalty *= (options.regularization / 2)
      self.objective = trainLoss + regularizationPenalty

      # See how well we're doing on our actual goal (error rate).
      trainError = util.getClassificationErrorRate(trainExamples, self.predict, trainKickingExamples, 'train', options.verbose, self.featureExtractor, self.weights)
      
      if options.single == 'no':
        validationError = util.getClassificationErrorRate(validationExamples, self.predict, validationKickingExamples, 'validation', options.verbose, self.featureExtractor, self.weights)

        print "Round %s/%s: objective = %.2f = %.2f + %.2f, train error = %.4f, validation error = %.4f" % (round+1, options.numRounds, self.objective, trainLoss, regularizationPenalty, trainError, validationError)

    # Print out feature weights
    out = open('weights', 'w')
    for f, v in sorted(self.weights.items(), key=lambda x: -x[1]):
      print >>out, f + "\t" + str(v)
    out.close()
    if options.single == 'yes' and len(validationExamples) != 0:
      print (self.weights * self.featureExtractor(validationExamples[0][0]))
      if self.predict(validationExamples[0][0]) == 1:
        print "pass"
      else: print "run"
    elif options.single == 'yes' and len(validationKickingExamples) != 0:
      quarter, time, down, dist, yrdline, play, awyscr, homescr, epb, epa, home, team = validationKickingExamples[0][0].split(",")
      prediction = "field goal"
      minute = time.split(":")
      scorediff = 0
      if home == "True":
        scorediff = int(homescr) - int(awyscr)
      else:
        scorediff = int(awyscr) - int(homescr)
      if int(minute[0]) <= 3 and (quarter == "4"):
        if (scorediff < -3) and (scorediff >= -16):
          prediction = "pass"
        elif (scorediff >= -3) and scorediff < 0 and (team in yrdline):
          prediction = "pass"
      else:
        if team in yrdline:
          prediction = "punt"
        else:
          yrdinfo = yrdline.split(" ")
          first = yrdinfo[0]
          if first == "50": prediction = "punt"
          else:
            if int(yrdinfo[1]) >= 38: prediction = "punt"
      print prediction
Пример #5
0
    def learn(self, trainExamples, validationExamples, loss, lossGradient, options):
        self.weights = util.Counter()
        random.seed(42)

        # You should go over the training data numRounds times.
        # Each round, go through all the examples in some random order and update
        # the weights with respect to the gradient.
        for r in xrange(0, options.numRounds):
            random.shuffle(trainExamples)
            numUpdates = 0  # Should be incremented with each example and determines the step size.

            # Loop over the training examples and update the weights based on loss and regularization.
            # If your code runs slowly, try to explicitly write out the dot products
            # in the code here (e.g., "for key,value in counter: counter[key] += ---"
            # rather than "counter * other_vector")
            for x, y in trainExamples:
                numUpdates += 1
                "*** YOUR CODE HERE (around 7 lines of code expected) ***"
                featureVector = self.featureExtractor(x)
                lossGrad = lossGradient(featureVector, y, self.weights)
                stepSize = options.initStepSize / (numUpdates ** options.stepSizeReduction)
                if options.regularization > 0:
                    c = float(options.regularization) / len(trainExamples)
                    for fKey in self.weights.iterkeys():
                        if self.weights[fKey] != 0 or lossGrad[fKey] != 0:
                            self.weights[fKey] -= stepSize * (lossGrad[fKey] + c * self.weights[fKey])
                else:
                    for fKey, fLoss in lossGrad.iteritems():
                        self.weights[fKey] -= stepSize * fLoss

            # Compute the objective function.
            # Here, we have split the objective function into two components:
            # the training loss, and the regularization penalty.
            # The objective function is the sum of these two values
            trainLoss = 0  # Training loss
            regularizationPenalty = 0  # L2 Regularization penalty
            "*** YOUR CODE HERE (around 5 lines of code expected) ***"
            for x, y in trainExamples:
                featureVector = self.featureExtractor(x)
                trainLoss += loss(featureVector, y, self.weights)
            for weight in self.weights.itervalues():
                if weight > 0:
                    regularizationPenalty += weight ** 2
            regularizationPenalty *= 0.5 * options.regularization
            self.objective = trainLoss + regularizationPenalty

            # See how well we're doing on our actual goal (error rate).
            trainError = util.getClassificationErrorRate(
                trainExamples, self.predict, "train", options.verbose, self.featureExtractor, self.weights
            )
            validationError = util.getClassificationErrorRate(
                validationExamples, self.predict, "validation", options.verbose, self.featureExtractor, self.weights
            )

            print "Round %s/%s: objective = %.2f = %.2f + %.2f, train error = %.4f, validation error = %.4f" % (
                r + 1,
                options.numRounds,
                self.objective,
                trainLoss,
                regularizationPenalty,
                trainError,
                validationError,
            )

        # Print out feature weights
        out = open("weights", "w")
        for f, v in sorted(self.weights.items(), key=lambda x: -x[1]):
            print >> out, f + "\t" + str(v)
        out.close()