def setCorpus(self, corpus_id): r = self.session.get(lsrh.getCorpusURL(corpus_id)) lsrh.checkStatus(r, (200,), "Corpus id not found on remote server.") response = r.json() self.prompt_id = lsrh.getIdFromURL(response["prompt"]) self.corpus_id = corpus_id print 'Set corpus to id {} for prompt {}'.format(self.corpus_id, self.prompt_id)
def newCorpus(self, prompt_id): description = raw_input("Description of corpus? ") data = {"prompt": lsrh.getPromptURL(prompt_id), "description": description} r = self.session.post(lsrh.getCorpusURL(), data=json.dumps(data), headers=self.headers) lsrh.checkStatus(r, (201,), "Prompt id not found on remote server, or description was illegal.") self.prompt_id = prompt_id self.corpus_id = lsrh.getIdFromResponse(r) print 'New corpus with id {} for prompt {}'.format(self.corpus_id, self.prompt_id)
def trainModel(self): ''' Train model based on the training answers for the current corpus. Returns url of trained_model. ''' # create and start training task data = {"corpus": lsrh.getCorpusURL(self.corpus_id)} r = self.session.post(lsrh.getTrainingTaskURL(), data=json.dumps(data)) lsrh.checkStatus(r, (201,), "Failed to create training task") trainingTaskData = r.json() r2 = self.session.post(trainingTaskData["process"]) r3 = self.waitForTask(trainingTaskData["url"], "training task") return r3.json()["trained_model"]
def uploadCorpus(self, prompt_id, fname): ''' Create a new corpus of training answers, given the prompt_id and a file containing the corpus. ''' self.newCorpus(prompt_id) r = self.uploadToS3('corpus', fname) soup = bsoup(r.content) key = soup.find('key').get_text() data2 = {'corpus': lsrh.getCorpusURL(self.corpus_id), 's3_key': key, 'content_type': 'text/csv'} # create and start upload task r2 = self.session.post(lsrh.getCorpusUploadTaskURL(), data=json.dumps(data2)) corpusUploadTaskData = r2.json() r3 = self.session.post(corpusUploadTaskData["process"]) lsrh.checkStatus(r3, (200, 202), "Queueing of corpus upload task failed.") r4 = self.waitForTask(corpusUploadTaskData["url"], "corpus upload task") return r3.json()["corpus"]
def addTrainingAnswer(self, essay): data = {"corpus": lsrh.getCorpusURL(self.corpus_id), "text": essay.getText()} r = self.session.post(lsrh.getTrainingAnswerURL(), data=json.dumps(data)) self.trainingAnswerMap[essay.essay_id] = lsrh.getIdFromResponse(r)