Пример #1
0
    def clean_review(review):
        course = review['class']
        if course is None:
            return {}

        course = course.lower()
        matches = re.findall(r'([a-z]+).*?([0-9]{3}[a-z]?)(?:[^0-9]|$)',
                             course)
        # TODO(mack): investigate if we are missing any good courses with
        # this regex
        if len(matches) != 1 or len(matches[0]) != 2:
            return {}

        department_id = matches[0][0].lower()
        course_number = matches[0][1].lower()
        course_id = department_id + course_number
        prof_name = get_prof_name(data['prof_name'])
        prof_id = m.Professor.get_id_from_name(prof_name['first_name'],
                                               prof_name['last_name'])

        clean_review = {
            'professor_id': prof_id,
            'course_id': course_id,
            'course_review': m.CourseReview(),
            'professor_review': m.ProfessorReview(),
        }

        def normalize_rating(menlo_rating):
            # normalize 1..5 to Yes/No:
            # 1,2 => No, 3 => None, 4,5 => Yes
            try:
                menlo_rating = int(menlo_rating)
                if menlo_rating <= 2:
                    return 0
                elif menlo_rating >= 4:
                    return 1
                else:
                    return None
            except:
                return None

        # TODO(mack): include 'r_helpful'?
        if 'r_clarity' in review:
            clean_review['professor_review'].clarity = \
                normalize_rating(review['r_clarity'])
        if 'r_easy' in review:
            clean_review['course_review'].easiness = \
                normalize_rating(review['r_easy'])
        if 'r_interest' in review:
            clean_review['course_review'].interest = \
                normalize_rating(review['r_interest'])

        clean_review['professor_review'].comment = review['comment']
        clean_review['professor_review'].comment_date = datetime.strptime(
            review['date'], '%m/%d/%y')

        return clean_review
Пример #2
0
def generic_stats():
    num_ucs = m.UserCourse.objects().count()

    num_users = m.User.objects.count()
    num_users_with_transcript = m.User.objects(
        transcripts_imported__gt=0).count()
    num_users_with_schedule = m.User.objects(schedules_imported__gt=0).count()

    num_course_reviews = m.UserCourse.objects(
        course_review__comment__ne='').count()
    num_professor_reviews = m.UserCourse.objects(
        professor_review__comment__ne='').count()

    num_courses = m.Course.objects.count()

    # TODO(david): Make rating_fields a class method
    num_course_ratings = 0
    for rating in m.CourseReview().rating_fields():
        query = {'course_review__%s__ne' % rating: None}
        num_course_ratings += m.UserCourse.objects(**query).count()

    num_professor_ratings = 0
    for rating in m.ProfessorReview().rating_fields():
        query = {'professor_review__%s__ne' % rating: None}
        num_professor_ratings += m.UserCourse.objects(**query).count()

    q = Q()
    for rating in m.CourseReview().rating_fields():
        q |= Q(**{'course_review__%s__ne' % rating: None})
    for rating in m.ProfessorReview().rating_fields():
        q |= Q(**{'professor_review__%s__ne' % rating: None})
    q |= Q(course_review__comment__ne='')
    q |= Q(professor_review__comment__ne='')
    ucs_rated_reviewed = m.UserCourse.objects.filter(q)
    num_ucs_rated_reviewed = ucs_rated_reviewed.count()

    # Take intersection of all_course_ids and reviewed_course_ids just in case
    rated_reviewed_course_ids = set(ucs_rated_reviewed.distinct('course_id'))
    # TODO(mduan): Verify that # (rated_reviewed_course_ids - all_course_ids)
    # is empty
    num_courses_rated_reviewed = len(rated_reviewed_course_ids)

    # Take intersection of all_course_ids and reviewed_course_ids just in case
    rated_reviewed_user_ids = set(ucs_rated_reviewed.distinct('user_id'))
    # TODO(mduan): Verify that (rated_reviewed_user_ids - all_course_ids)
    # is empty
    num_users_rated_reviewed = len(rated_reviewed_user_ids)

    yesterday = datetime.now() - timedelta(hours=24)
    signups = users_joined_after(yesterday)

    result = {
        'num_users': num_users,
        'num_signups_today': signups,
        'num_users_with_transcript': num_users_with_transcript,
        'num_users_with_schedule': num_users_with_schedule,
        'num_ratings': num_course_ratings + num_professor_ratings,
        'num_reviews': num_course_reviews + num_professor_reviews,
        'num_ucs': num_ucs,
        'num_ucs_rated_reviewed': num_ucs_rated_reviewed,
        'num_courses': num_courses,
        'num_courses_rated_reviewed': num_courses_rated_reviewed,
        'num_users_rated_reviewed': num_users_rated_reviewed,
        'num_signups_start_time': yesterday,
        'epoch': datetime.now(),
    }

    return result