示例#1
0
def group_ctecs_by_course_id():
	grouped_course_ctecs = defaultdict(list)
	for ctec in ctecs.find():
		course = courses.find_one({"_id": ctec["_id"]})
		course_ctec = dict(course)
		course_ctec.update(ctec)
		course_ctec['id'] = course_ctec.pop('_id')
		key = str(course_ctec['course_id'])
		grouped_course_ctecs[key].append(course_ctec)
	return grouped_course_ctecs
示例#2
0
from textblob import TextBlob
from models import ctecs

easy_words = ["easy", "stress free", "painless", "little work", "no work", "breeze"]
hard_words = ["hard", "challenging", "difficult"]

for ctec in ctecs.find():
	ctec['easiness'] = sum([ctec['essay'].count(word) for word in easy_words])
	ctec['hardness'] = sum([ctec['essay'].count(word) for word in hard_words])
	blob = TextBlob(ctec['essay'].replace("/", " "))
	ctec['adjectives'] = " ".join([word for word, tag in blob.tags if "JJ" in tag])
	ctecs.save(ctec)
	print ctec['_id']
示例#3
0
from models import ctecs, courses, terms

for ctec in ctecs.find({'corrected_essay': {'$exists': True}}):
	ctec['subj'] = ctec['subj'].split()[0]
	print unicode("[{academic_term}] - {subj} {class_title}").format(**ctec)
	print ctec['essay']
	print "-----------------------------"

# from utils import group_ctecs_by_course_id
# grouped_course_ctecs = group_ctecs_by_course_id()

# for course_id, course_ctecs in grouped_course_ctecs.iteritems():
# 	with open("temp/%s.txt" % course_id, "w") as f:
# 		for course_ctec in course_ctecs:
# 			f.write(course_ctec['title'])
# 			f.write("\n")
# 			f.write(course_ctec['essay'].encode('utf-8'))
# 			f.write("\n\n")
示例#4
0
from models import ctecs, courses
import csv

course_fieldnames = ["id", "term", "year", "quarter", "course_id", "class_num", "school", "subject", "catalog_num", "section", "title", "instructor", "start_time", "end_time", "meeting_days"]
ctec_fieldnames = ["enrollment_count", "response_count", "question0_average_rating", "question1_average_rating", "question2_average_rating", "question3_average_rating", "question4_average_rating", "easiness", "hardness", "essay"]
fieldnames = course_fieldnames + ctec_fieldnames

with open("ctecs.csv", "w") as f:
	writer = csv.DictWriter(f, fieldnames=fieldnames)
	writer.writeheader()
	for ctec in ctecs.find():
		course = courses.find_one({"_id": ctec["_id"]})
		course_ctec = dict(course)
		course_ctec.update(ctec)
		course_ctec['id'] = course_ctec.pop('_id')
		course_ctec['essay'] = course_ctec['essay'].encode('utf-8')
		course_ctec['year'] = course_ctec['term'].split()[0]
		course_ctec['quarter'] = course_ctec['term'].split()[1]
		writer.writerow({k:v for k,v in course_ctec.iteritems() if k in fieldnames})

		# print course_ctec['id'], course_ctec['term'], course_ctec['catalog_num'], course_ctec['instructor']
示例#5
0
from models import ctecs, courses, terms

for ctec in ctecs.find({'corrected_essay': {'$exists': True}}):
    ctec['subj'] = ctec['subj'].split()[0]
    print unicode("[{academic_term}] - {subj} {class_title}").format(**ctec)
    print ctec['essay']
    print "-----------------------------"

# from utils import group_ctecs_by_course_id
# grouped_course_ctecs = group_ctecs_by_course_id()

# for course_id, course_ctecs in grouped_course_ctecs.iteritems():
# 	with open("temp/%s.txt" % course_id, "w") as f:
# 		for course_ctec in course_ctecs:
# 			f.write(course_ctec['title'])
# 			f.write("\n")
# 			f.write(course_ctec['essay'].encode('utf-8'))
# 			f.write("\n\n")