示例#1
0
    def setCorpus(self, corpus_id):
        r = self.session.get(lsrh.getCorpusURL(corpus_id))
        lsrh.checkStatus(r, (200,), "Corpus id not found on remote server.")

        response = r.json()

        self.prompt_id = lsrh.getIdFromURL(response["prompt"])
        self.corpus_id = corpus_id
        print 'Set corpus to id {} for prompt {}'.format(self.corpus_id, self.prompt_id)
示例#2
0
    def newCorpus(self, prompt_id):
        description = raw_input("Description of corpus? ")
        data = {"prompt": lsrh.getPromptURL(prompt_id), "description": description}
        r = self.session.post(lsrh.getCorpusURL(), data=json.dumps(data), headers=self.headers)
        lsrh.checkStatus(r, (201,), "Prompt id not found on remote server, or description was illegal.")

        self.prompt_id = prompt_id
        self.corpus_id = lsrh.getIdFromResponse(r)

        print 'New corpus with id {} for prompt {}'.format(self.corpus_id, self.prompt_id)
示例#3
0
    def trainModel(self):
        ''' Train model based on the training answers for the current corpus.
            Returns url of trained_model. '''
        # create and start training task
        data = {"corpus": lsrh.getCorpusURL(self.corpus_id)}
        r = self.session.post(lsrh.getTrainingTaskURL(), data=json.dumps(data))
        lsrh.checkStatus(r, (201,), "Failed to create training task")

        trainingTaskData = r.json()
        r2 = self.session.post(trainingTaskData["process"])

        r3 = self.waitForTask(trainingTaskData["url"], "training task")

        return r3.json()["trained_model"]
示例#4
0
    def uploadCorpus(self, prompt_id, fname):
        ''' Create a new corpus of training answers, given the prompt_id and a file containing
            the corpus. '''
        self.newCorpus(prompt_id)

        r = self.uploadToS3('corpus', fname)

        soup = bsoup(r.content)
        key = soup.find('key').get_text()

        data2 = {'corpus': lsrh.getCorpusURL(self.corpus_id), 's3_key': key, 'content_type': 'text/csv'}

        # create and start upload task
        r2 = self.session.post(lsrh.getCorpusUploadTaskURL(), data=json.dumps(data2))
        corpusUploadTaskData = r2.json()
        r3 = self.session.post(corpusUploadTaskData["process"])

        lsrh.checkStatus(r3, (200, 202), "Queueing of corpus upload task failed.")

        r4 = self.waitForTask(corpusUploadTaskData["url"], "corpus upload task")
        return r3.json()["corpus"]
示例#5
0
 def addTrainingAnswer(self, essay):
     data = {"corpus": lsrh.getCorpusURL(self.corpus_id), "text": essay.getText()}
     r = self.session.post(lsrh.getTrainingAnswerURL(), data=json.dumps(data))
     self.trainingAnswerMap[essay.essay_id] = lsrh.getIdFromResponse(r)