示例#1
0
    def get_questions(self):
        Question = get_model("Question")
        Paper = get_model("Paper")

        return object_session(self)\
            .query(Question)\
            .filter(
                (Paper.course_id == self.id) &
                (Question.paper_id == Paper.id)
            ).all()
示例#2
0
    def get_questions(self):
        Question = get_model("Question")
        Paper = get_model("Paper")

        return object_session(self)\
            .query(Question)\
            .filter(
                (Paper.course_id == self.id) &
                (Question.paper_id == Paper.id)
            ).all()
示例#3
0
    def find_similar_questions(self, question):
        # Compute the tf-idf if not already completed
        if not self.vectorizer:
            self.vectorize()

        # Grab the question we have to find similar for's index
        question_index = 0
        for i, q in enumerate(self.questions):
            if question.id == q.id:
                question_index = i
                break

        # Grab our question vector
        query = self.tfidf_documents[question_index, :]

        # Compute the similarity and return a gram matrix
        # of D_n x Query and stick it in a datafram
        similarity = cosine_similarity(self.tfidf_documents, query).flatten()

        # Grab the similiar model
        Similar = get_model("Similar")

        # Generate the similarity objects
        return [
            Similar(question_id=question.id,
                    similar_question_id=q.id,
                    similarity=s) for q, s in zip(self.questions, similarity)
        ]
示例#4
0
    def popular_questions(self):
        """Find the most popular questions. 

        This loops through all the questions, find's the similar questions
        and ranks them by sum(similarity)
        """
        session = object_session(self)
        Similar = get_model("Similar")
        Question = get_model("Question")
        Paper = get_model("Paper")

        # SQL:
        # exam_papers=# select question_id, sum(similarity) as similarity from similar_questions
        #   where similarity > 0.6 and question_id != similar_question_id
        #   group by question_id order by similarity DESC;

        popular = (session.query(
            Similar.question_id.label("question_id"),
            func.sum(Similar.similarity).label("cum_similarity")).group_by(
                Similar.question_id)).subquery()

        questions = session.query(Question)\
            .join(popular, Question.id == popular.c.question_id)\
            .join(Paper, Paper.id == Question.paper_id)\
            .filter(Paper.course_id == self.id)\
            .order_by(popular.c.cum_similarity.desc())\
            .limit(25)\
            .all()

        # Now we pick only one of a similar group of questions
        # A graph DB would be handy right about now
        grouped = []
        for question in questions:
            inside = False
            # Loop over each similar questions in the already selected questions
            for grouped_question in grouped:
                inside = bool(
                    find(grouped_question.similar,
                         lambda q: q.similar_question_id == question.id))

            if not inside or len(grouped) == 0:
                grouped.append(question)

        return grouped
示例#5
0
    def popular_questions(self):
        """Find the most popular questions. 

        This loops through all the questions, find's the similar questions
        and ranks them by sum(similarity)
        """
        session = object_session(self)
        Similar = get_model("Similar")
        Question = get_model("Question")
        Paper = get_model("Paper")

        # SQL:
        # exam_papers=# select question_id, sum(similarity) as similarity from similar_questions 
        #   where similarity > 0.6 and question_id != similar_question_id 
        #   group by question_id order by similarity DESC;

        popular = (session.query(
            Similar.question_id.label("question_id"), 
            func.sum(Similar.similarity).label("cum_similarity")
        ).group_by(Similar.question_id)).subquery()

        questions = session.query(Question)\
            .join(popular, Question.id == popular.c.question_id)\
            .join(Paper, Paper.id == Question.paper_id)\
            .filter(Paper.course_id == self.id)\
            .order_by(popular.c.cum_similarity.desc())\
            .limit(25)\
            .all()

        # Now we pick only one of a similar group of questions
        # A graph DB would be handy right about now
        grouped = []
        for question in questions:
            inside = False
            # Loop over each similar questions in the already selected questions
            for grouped_question in grouped:
                inside = bool(find(grouped_question.similar, lambda q: q.similar_question_id == question.id))

            if not inside or len(grouped) == 0:
                grouped.append(question)

        return grouped
示例#6
0
    def find_similar_questions(self, question):
        # Compute the tf-idf if not already completed
        if not self.vectorizer:
            self.vectorize()

        # Grab the question we have to find similar for's index
        question_index = 0
        for i, q in enumerate(self.questions):
            if question.id == q.id:
                question_index = i
                break

        # Grab our question vector
        query = self.tfidf_documents[question_index, :]

        # Compute the similarity and return a gram matrix
        # of D_n x Query and stick it in a datafram
        similarity = cosine_similarity(self.tfidf_documents, query).flatten()

        # Grab the similiar model
        Similar = get_model("Similar");

        # Generate the similarity objects
        return [Similar(question_id=question.id, similar_question_id=q.id, similarity=s) for q, s in zip(self.questions, similarity)]