Пример #1
0
    def train(self):
        rsl = []
        files = listdir(self.path)
        reader = InputData(self.dataset_type, self.path)
        corpus = plsa.Corpus()
        for filename in files:
            vectors=[]
            question = reader.readFile(filename)
            id = question["id"]
            self.data[id]=[ref["text"] for ref in question["referenceAnswers"]]
            for r in question["referenceAnswers"]:
                rid=r["id"]
                references=[ sr["text"] for sr in r["studentAnswers"]]
                """
                for ans in question["student_answers"]:
                    if ans["id"]==rid:
                        references.append(ans["text"])
                #references=[ self.stemmer.stem(sr["text"]) for sr in r["studentAnswers"]]
                """
                references.append(r["text"])
                corpus.addBaseline(references)
                #print corpus.getVector()
                vectors.append(corpus.getVector())
                corpus.reset()

            self.model[id]=vectors

        return
Пример #2
0
    def test(self,mode,inputdir,outputdir):
        head = ["id","grade" ,"Accuracy","Predicted"]
        self.mode=mode
        rsl=[]
        files = listdir(inputdir)
        reader = InputData(self.dataset_type, inputdir)
        for filename in files:
            question = reader.readFile(filename)
            id = question["id"]
            stuAns = []
            for r in question["referenceAnswers"]:   
                for sr in r["studentAnswers"]:
                    stuAns.append(sr)
            for sr in question["otherStudentAnswers"]:
                stuAns.append(sr)
                
            for sr in stuAns:
                grade=""
                if self.nonDomain.test(sr["text"]):
                    if mode==2 or mode==3:
                        grade="incorrect"
                    if mode==5:
                        grade="non_domain"
                    rsl.append({"id": sr["id"],"Accuracy":sr["accuracy"],"Predicted":grade,"grade":"NA"})
                    print rsl[len(rsl)-1]
                    continue

                if self.contradictBigram.isContradictory(id,sr["text"]) or self.contradict.isContradictory(self.modeler.getReferences(id),sr["text"]):
                    if mode==2:
                        grade="incorrect"
                    if mode==3 or mode==5:
                        grade="contradictory"
                    rsl.append({"id": sr["id"],"Accuracy":sr["accuracy"],"Predicted":grade,"grade":"NA"})
                    print rsl[len(rsl)-1]
                    continue

                score=self.modeler.grade(id,sr["text"])
                if self.datamode== "beetle":
                    self.irr.build(self.modeler.getReferences(id))
                    if self.irr.isIrrelevent(sr["text"]):
                        score=-1
                grade=self.predict(score)
                rsl.append({"id": sr["id"],"Accuracy":sr["accuracy"],"Predicted":grade,"grade":score})
                print rsl[len(rsl)-1]
            
            output(outputdir, head, rsl)