Пример #1
0
    def addAnswer(self, text):
        data = { "author": lsrh.getAuthorURL(self.author_id),
                 "prompt": lsrh.getPromptURL(self.prompt_id),
                 "text": text }

        r = self.session.post(lsrh.getAnswerURL(), data=json.dumps(data))
        lsrh.checkStatus(r, (201,), "Failed to upload answer.")
Пример #2
0
    def setAnswerSet(self, answer_set_id):
        r = self.session.get(lsrh.getAnswerSetURL(answer_set_id))
        try:
            lsrh.checkStatus(r, (200,), "Answer set doesn't exist")
        except:
            return

        self.answer_set_id = answer_set_id
Пример #3
0
    def setCorpus(self, corpus_id):
        r = self.session.get(lsrh.getCorpusURL(corpus_id))
        lsrh.checkStatus(r, (200,), "Corpus id not found on remote server.")

        response = r.json()

        self.prompt_id = lsrh.getIdFromURL(response["prompt"])
        self.corpus_id = corpus_id
        print 'Set corpus to id {} for prompt {}'.format(self.corpus_id, self.prompt_id)
Пример #4
0
    def addAnswerSet(self, trained_models = [], tags = []):
        data = { "prompt": lsrh.getPromptURL(self.prompt_id),
                 "trained_models": [lsrh.getTrainedModelURL(t) for t in trained_models] if len(trained_models) > 0 else [], 
                 "tags": tags }

        r = self.session.post(lsrh.getAnswerSetURL(), data=json.dumps(data))
        lsrh.checkStatus(r, (201,), "Failed to create answer set.")

        self.answer_set_id = lsrh.getIdFromResponse(r)
Пример #5
0
    def newCorpus(self, prompt_id):
        description = raw_input("Description of corpus? ")
        data = {"prompt": lsrh.getPromptURL(prompt_id), "description": description}
        r = self.session.post(lsrh.getCorpusURL(), data=json.dumps(data), headers=self.headers)
        lsrh.checkStatus(r, (201,), "Prompt id not found on remote server, or description was illegal.")

        self.prompt_id = prompt_id
        self.corpus_id = lsrh.getIdFromResponse(r)

        print 'New corpus with id {} for prompt {}'.format(self.corpus_id, self.prompt_id)
Пример #6
0
    def runPredictions(self):
        ''' Returns list of urls of prediction results. '''
        data = { "answer_set": lsrh.getAnswerSetURL(self.answer_set_id) }
        r = self.session.post(lsrh.getPredictionTaskURL(), data=json.dumps(data))

        lsrh.checkStatus(r, (201,), "Failed to create prediction task")

        predictionTaskData = r.json()
        r2 = self.session.post(predictionTaskData["process"])

        r3 = self.waitForTask(predictionTaskData["url"], "prediction task")

        return r3.json()["prediction_results"]
Пример #7
0
    def trainModel(self):
        ''' Train model based on the training answers for the current corpus.
            Returns url of trained_model. '''
        # create and start training task
        data = {"corpus": lsrh.getCorpusURL(self.corpus_id)}
        r = self.session.post(lsrh.getTrainingTaskURL(), data=json.dumps(data))
        lsrh.checkStatus(r, (201,), "Failed to create training task")

        trainingTaskData = r.json()
        r2 = self.session.post(trainingTaskData["process"])

        r3 = self.waitForTask(trainingTaskData["url"], "training task")

        return r3.json()["trained_model"]
Пример #8
0
 def waitForTask(self, url, taskName, interval=20):
     ''' Block until task finishes; check every [interval] seconds.
         Return request response. '''
     while True:
         time.sleep(interval)
         r = self.session.get(url)
         lsrh.checkStatus(r, (200,), "Couldn't access {}...".format(taskName))
         uploadStatus = r.json()["status"]
         # MAGIC VALUES: Lightside API uses 'S' for success, 'F' for failure, 'W' for "task hasn't yet been submitted"; everything else is queueing/waiting
         if uploadStatus == 'S':
             print ''
             return r
         elif uploadStatus == 'F' or uploadStatus == 'U':
             raise ValueError("{} failed!".format(taskName.capitalize()))
         elif uploadStatus == 'W':
             raise ValueError("{} hasn't been submitted for processing!".format(taskName.capitalize()))
         else:
             print '.',
             sys.stdout.flush()
Пример #9
0
    def uploadAnswerSet(self, fname):
        if self.answer_set_id == None:
            self.addAnswerSet()

        r = self.uploadToS3('answerset', fname)

        soup = bsoup(r.content)
        key = soup.find('key').get_text()

        data2 = {'prompt': lsrh.getPromptURL(self.prompt_id), 'answer_set': lsrh.getAnswerSetURL(self.answer_set_id), 's3_key': key, 'content_type': 'text/csv'}

        # create and start upload task
        r2 = self.session.post(lsrh.getAnswerSetUploadTaskURL(), data=json.dumps(data2))
        answerUploadTaskData = r2.json()
        r3 = self.session.post(answerUploadTaskData["process"])

        lsrh.checkStatus(r3, (200, 202), "Queueing of answerset upload task failed.")

        r4 = self.waitForTask(answerUploadTaskData["url"], "answer set upload task")
        return r4.json()["answer_set"]
Пример #10
0
    def uploadCorpus(self, prompt_id, fname):
        ''' Create a new corpus of training answers, given the prompt_id and a file containing
            the corpus. '''
        self.newCorpus(prompt_id)

        r = self.uploadToS3('corpus', fname)

        soup = bsoup(r.content)
        key = soup.find('key').get_text()

        data2 = {'corpus': lsrh.getCorpusURL(self.corpus_id), 's3_key': key, 'content_type': 'text/csv'}

        # create and start upload task
        r2 = self.session.post(lsrh.getCorpusUploadTaskURL(), data=json.dumps(data2))
        corpusUploadTaskData = r2.json()
        r3 = self.session.post(corpusUploadTaskData["process"])

        lsrh.checkStatus(r3, (200, 202), "Queueing of corpus upload task failed.")

        r4 = self.waitForTask(corpusUploadTaskData["url"], "corpus upload task")
        return r3.json()["corpus"]
Пример #11
0
    def uploadToS3(self, uploadType, fname):
        ''' Upload file to S3 and return the response. '''
        # get upload params
        if uploadType == "corpus":
            r = self.session.get(lsrh.getCorpusUploadParametersURL())
        elif uploadType == "answerset":
            r = self.session.get(lsrh.getAnswerSetUploadParametersURL())
        else:
            raise ValueError("Must upload either a corpus or answerset to s3")

        lsrh.checkStatus(r, (200,), "Request to get {} upload parameters failed.".format(uploadType))

        params = r.json()
        data2 = { 'AWSAccessKeyId': params['access_key_id'], 'key': params['key'], 'acl': 'public-read',
                'Policy': params['policy'], 'Signature': params['signature'], 'success_action_status': '201' }

        with open(fname, 'rb') as uploadfile:
            files2 = {'file': uploadfile}
            # upload to s3
            r2 = requests.post(params['s3_endpoint'], data=data2, files=files2)
            lsrh.checkStatus(r2, (201,), "Request to S3 failed.")

            return r2
Пример #12
0
    def getPrompt(self, prompt_id):
        r = self.session.get(lsrh.getPromptURL(prompt_id))
        lsrh.checkStatus(r, (201,), "Prompt id not found on remote server")

        return r