Exemplo n.º 1
0
  def output(self, partId, ch_aux):
    """Uses the student code to compute the output for test cases."""
    trainCorpus = HolbrookCorpus('../data/holbrook-tagged-train.dat')

    if partId in [1,2]:
      editModel = EditModel('../data/count_1edit.txt', trainCorpus)
      return json.dumps([[(e.editedWord, e.rule()) for e in editModel.edits(line.strip())] for line in ch_aux.split("\n")])
    else:
      testCorpus = HolbrookCorpus()
      testCorpus.slurpString(ch_aux)
      lm = None
      if partId in [3,4]:
        lm = LaplaceUnigramLanguageModel(trainCorpus)
      elif partId in [5,6]:
        lm = LaplaceBigramLanguageModel(trainCorpus)
      elif partId in [7,8]:
        lm = StupidBackoffLanguageModel(trainCorpus)
      elif partId in [9,10]:
        lm = CustomLanguageModel(trainCorpus)
      else:
        print 'Unknown partId: " + partId'
        return None

      speller = SpellCorrect(lm, trainCorpus)
      output = speller.correctCorpus(testCorpus)
      # put in the part ID as well
      output = '[["%d"],%s' % (partId, output[1:])
      return output
Exemplo n.º 2
0
    def output(self, partId, ch_aux):
        """Uses the student code to compute the output for test cases."""
        trainCorpus = HolbrookCorpus('../data/holbrook-tagged-train.dat')

        if partId in [1, 2]:
            editModel = EditModel('../data/count_1edit.txt', trainCorpus)
            return json.dumps([[(e.editedWord, e.rule())
                                for e in editModel.edits(line.strip())]
                               for line in ch_aux.split("\n")])
        else:
            testCorpus = HolbrookCorpus()
            testCorpus.slurpString(ch_aux)
            lm = None
            if partId in [3, 4]:
                lm = LaplaceUnigramLanguageModel(trainCorpus)
            elif partId in [5, 6]:
                lm = LaplaceBigramLanguageModel(trainCorpus)
            elif partId in [7, 8]:
                lm = StupidBackoffLanguageModel(trainCorpus)
            elif partId in [9, 10]:
                lm = CustomLanguageModel(trainCorpus)
            else:
                print 'Unknown partId: " + partId'
                return None

            speller = SpellCorrect(lm, trainCorpus)
            output = speller.correctCorpus(testCorpus)
            # put in the part ID as well
            output = '[["%d"],%s' % (partId, output[1:])
            return output