def run(self): """ Runs every X seconds, the main run loop """ print "RUNNING THE EVENT COUNT" utils.log("The Event Count starts at: " + str(datetime.now())) course_items = self.get_all_courses().items() self.clean_ec_db(course_items) last_run = self.find_last_run_ingest("EventCount") last_timefinder = self.find_last_run_ingest("TimeFinder") if self.finished_ingestion("TimeFinder") and last_run < last_timefinder: print "STARTING EVENT COUNT" for course_id, course in course_items: print "EVENT COUNTING " + str(course_id) print course_id # Get events from course info courseinfo = course_info.load_course_info(course['coursestructure']) if courseinfo is None: utils.log("Can not find course info for ." + str(course_id)) continue # print courseinfo # Get events events = self.get_events(courseinfo) # Create courseevent table self.create_ec_table(course_id, events) events_date_counts = {} for event_id in events: self.group_event_by_date(course['mongoname'], event_id, events_date_counts) # print events_date_counts # Insert records into database self.insert_ec_table(course_id, events_date_counts) self.sql_ec_conn.commit() self.loop = False utils.log("The Event Count ends at: " + str(datetime.now()))
def datadump2csv(self): """ Generates a CSV file for CRM """ e_tablename = self.ecrm_table print "Exporting CSV: " + e_tablename backup_path = config.EXPORT_PATH current_time = time.strftime('%m%d%Y-%H%M%S') # loop through courses - # write first file with headers then # each subsequent iteration append to file backup_prefix = e_tablename + "_" + current_time backup_file = os.path.join(backup_path, backup_prefix + ".csv") tmp_backup_file = backup_file + ".tmp" for idx, course in enumerate(self.get_all_courses().items()): try: course_id = course[0] mongoname = course[1]['mongoname'] dbname = course[1]['dbname'] # Get nice course name from course info courseinfo = course_info.load_course_info(course[1]['coursestructure']) if courseinfo is None: utils.log("Can not find course info for ." + str(course_id)) continue nice_name = courseinfo['display_name'] start = courseinfo['start'].split('T') start_date = datetime.strptime(start[0].replace('"', ''), "%Y-%m-%d") start_date = start_date.strftime("%d/%m/%Y") # au.last_login, query = "SELECT up.user_id, " \ "CASE au.is_staff " \ "WHEN 1 THEN 'Yes' ELSE 'No' END AS is_staff, " \ "au.is_active, TRIM(TRAILING '.' FROM e.email ) AS email, " \ "pc.viewed, pc.explored, pc.certified, pc.mode, " \ "TRIM(TRAILING '\\\\' FROM REPLACE(REPLACE(substring_index(e.full_name, ' ', 1),'�', ''), ',', '')) AS first_name, " \ "TRIM(TRAILING '\\\\' FROM SUBSTR(TRIM(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTR(e.full_name, LOCATE(' ', e.full_name)), '�', ''), ',', ''), '|', ''), CONVERT(CHAR(127) USING utf8), '')), 1, 30)) AS last_name, " \ "'{2}' AS course_id, " \ "'{3}' AS course_name, " \ "'{5}' AS course_start_date, " \ "DATE_FORMAT(pc.start_time,'%d/%m/%Y') as enrolled, " \ "CASE WHEN DATE_FORMAT(NOW(), '%Y') - up.year_of_birth < 15 THEN 'No' ELSE CASE WHEN e.is_opted_in_for_email = 'True' THEN 'Yes' ELSE 'No' END END AS is_opted_in_for_email, " \ "CASE up.gender WHEN 'm' THEN 'Male' WHEN 'f' THEN 'Female' WHEN 'o' THEN 'Other' ELSE NULL END as gender, " \ "CASE WHEN up.year_of_birth <= 1900 THEN NULL " \ "ELSE up.year_of_birth END AS year_of_birth ," \ "up.level_of_education, " \ "( CASE up.level_of_education " \ "WHEN 'p' THEN 'Doctorate' " \ "WHEN 'a' THEN 'Associate degree' " \ "WHEN 'b' THEN 'Bachelors degree' " \ "WHEN 'm' THEN 'Masters or professional degree' " \ "WHEN 'hs' THEN 'Secondary/high school' " \ "WHEN 'jhs' THEN 'Junior secondary/junior high/middle school' " \ "WHEN 'el' THEN 'Elementary/primary school' " \ "WHEN 'none' THEN 'No Formal Education' " \ "WHEN 'other' THEN 'Other Education' " \ "WHEN '' THEN 'User did not specify level of education' " \ "WHEN 'p_se' THEN 'Doctorate in science or engineering (no longer used)' " \ "WHEN 'p_oth' THEN 'Doctorate in another field (no longer used)' " \ "ELSE 'User did not specify level of education' END ) AS levelofEd, " \ "c.country_name " \ "FROM {0}.auth_user au " \ "JOIN {4}.emailcrm e ON au.email = e.email " \ "JOIN Person_Course.personcourse_{2} pc ON au.id = pc.user_id " \ "JOIN {0}.auth_userprofile up ON au.id = up.user_id " \ "LEFT JOIN {4}.countries_io c ON up.country = c.country_code " if not config.EMAILCRM_FULL_EXPORT: query += "LEFT JOIN {4}.lastexport le " \ "ON le.user_id = up.user_id " \ "AND le.viewed = pc.viewed " \ "AND le.explored = pc.explored " \ "AND le.certified = pc.certified " \ "AND le.course_id = '{2}' " \ "WHERE e.course_id = '{1}' " \ "AND le.user_id is null " \ "AND le.viewed is null " \ "AND le.explored is null " \ "AND le.certified is null " \ "AND le.course_id is null " query = query.format(dbname, mongoname, course_id, nice_name, self.ecrm_db, start_date) ec_cursor = self.sql_ecrm_conn.cursor() ec_cursor.execute(query) result = ec_cursor.fetchall() ec_cursor.close() if idx == 0: with open(tmp_backup_file, "wb") as csv_file: csv_writer = csv.writer(csv_file, dialect='excel', encoding='utf-8') csv_writer.writerow([i[0] for i in ec_cursor.description]) # write headers for row in result: csv_writer.writerow(row) else: with open(tmp_backup_file, "ab") as csv_file: csv_writer = csv.writer(csv_file, dialect='excel', encoding='utf-8') for row in result: csv_writer.writerow(row) utils.log("EmailCRM select for %s appended to file: %s" % (course_id, tmp_backup_file)) except Exception, e: print repr(e) utils.log("EmailCRM FAILED: %s" % (repr(e))) return
def run(self): """ Runs every X seconds, the main run loop """ last_run = self.find_last_run_ingest("PersonCourse") last_timefinder = self.find_last_run_ingest("TimeFinder") last_iptocountry = self.find_last_run_ingest("IpToCountry") last_dbstate = self.find_last_run_ingest("DatabaseState") if self.finished_ingestion("TimeFinder") and \ last_run < last_timefinder and \ self.finished_ingestion("IpToCountry") and \ last_run < last_iptocountry and \ self.finished_ingestion("DatabaseState") and \ last_run < last_dbstate: self.use_sql_databases() course_items = self.get_all_courses().items() # Create 'cf_table' self.create_cf_table() # Clean 'pc_table' self.clean_pc_db(course_items) for course_id, course in course_items: # Get chapters from course info courseinfo = course_info.load_course_info(course['coursestructure']) utils.log('PersonCourse: ' + str(course_id)) if courseinfo is None: utils.log("Can not find course info for ." + str(course_id)) continue cf_item = CFModel(course_id, course['dbname'], course['mongoname'], course['discussiontable']) # Set cf_item course_launch_date bad_start = False course_launch_date = None course_close_date = None if 'start' in courseinfo: try: course_launch_time = dateutil.parser.parse(courseinfo['start'].replace('"', "")) course_launch_date = course_launch_time.date() cf_item.set_course_launch_date(course_launch_date) except Exception: print "ERROR: BAD COURSE CODE START DATE" print courseinfo['start'] print type(courseinfo['start']) bad_start = True else: utils.log("Course not started for " + course_id) continue if bad_start: continue # Set cf_item course_close_date if 'end' in courseinfo: try: course_close_time = dateutil.parser.parse(courseinfo['end'].replace('"', "")) course_close_date = course_close_time.date() cf_item.set_course_close_date(course_close_date) except ValueError: pass # Set cf_item course_length if course_launch_date and course_close_date: date_delta = course_close_date - course_launch_date cf_item.set_course_length(math.ceil(date_delta.days / 7.0)) # Set cf_item nchapters chapters = [] chapters = self.get_chapter(courseinfo, chapters) nchapters = len(chapters) cf_item.set_nchapters(nchapters) half_chapters = math.ceil(float(nchapters) / 2) # Set cf_item nvideos, nhtmls, nassessments, nsummative_assessments, nformative_assessments, nincontent_discussions, nactivities content = self.analysis_chapters(chapters) cf_item.set_nvideos(content['nvideos']) cf_item.set_nhtmls(content['nhtmls']) cf_item.set_nassessments(content['nassessments']) cf_item.set_nsummative_assessments(content['nsummative_assessments']) cf_item.set_nformative_assessments(content['nformative_assessments']) cf_item.set_nincontent_discussions(content['nincontent_discussions']) cf_item.set_nactivities(content['nactivities']) # Create 'pc_table' self.create_pc_table(course_items) # Dict of items of personcourse, key is the user id pc_dict = {} # Select the database self.sql_course_conn.select_db(course['dbname']) course_cursor = self.sql_course_conn.cursor() # course_id for PCModel pc_course_id = course['mongoname'] utils.log("LOADING COURSE {" + pc_course_id + "}") # find all user_id utils.log("{auth_user}") query = "SELECT id, is_staff FROM auth_user" course_cursor.execute(query) result = course_cursor.fetchall() for record in result: pc_dict[record[0]] = PCModel(pc_course_id, record[0]) pc_dict[record[0]].set_roles(record[1]) # The list of user_id user_id_list = pc_dict.keys() user_id_list.sort() # print user_id_list # Set LoE, YoB, gender based on the data in {auth_userprofile} utils.log("{auth_userprofile}") query = "SELECT user_id, year_of_birth, level_of_education, gender FROM auth_userprofile WHERE user_id IN (" + ",".join( ["%s"] * len(user_id_list)) + ")" query = query % tuple(user_id_list) course_cursor.execute(query) result = course_cursor.fetchall() for record in result: user_id = int(record[0]) pc_dict[user_id].set_YoB(record[1]) pc_dict[user_id].set_LoE(record[2]) pc_dict[user_id].set_gender(record[3]) # Set certified based on the data in {certificates_generatedcertificate} utils.log("{certificates_generatedcertificate}") query = "SELECT user_id, grade, status FROM certificates_generatedcertificate WHERE user_id IN (" + ",".join( ["%s"] * len(user_id_list)) + ")" query = query % tuple(user_id_list) course_cursor.execute(query) result = course_cursor.fetchall() for record in result: user_id = int(record[0]) pc_dict[user_id].set_grade(float(record[1])) pc_dict[user_id].set_certified(record[2]) # Set start_time based on the data in {student_courseenrollment} utils.log("{student_courseenrollment}") query = "SELECT user_id, created, mode FROM student_courseenrollment WHERE user_id IN (" + ",".join( ["%s"] * len(user_id_list)) + ")" query = query % tuple(user_id_list) course_cursor.execute(query) result = course_cursor.fetchall() nhonor = 0 naudit = 0 nvertified = 0 registration_open_date = datetime.date.today() for record in result: user_id = int(record[0]) start_time = record[1] # dateutil.parser.parse(record[1]) start_date = start_time.date() pc_dict[user_id].set_start_time(start_date) pc_dict[user_id].set_mode(record[2]) if record[2] == 'honor': nhonor += 1 if record[2] == 'audit': naudit += 1 if record[2] == 'verified': nvertified += 1 if start_date < registration_open_date: registration_open_date = start_date # Set cf_item nhonor_students, naudit_students, nvertified_students, registration_open_date cf_item.set_nhonor_students(nhonor) cf_item.set_naudit_students(naudit) cf_item.set_nvertified_students(nvertified) cf_item.set_registration_open_date(registration_open_date) # Set ndays_act and viewed based on the data in {courseware_studentmodule} try: utils.log("{ndays_act: courseware_studentmodule}") query = "SELECT student_id, COUNT(DISTINCT SUBSTRING(created, 1, 10)) FROM courseware_studentmodule WHERE student_id is not null GROUP BY student_id" course_cursor.execute(query) result = course_cursor.fetchall() for record in result: user_id = int(record[0]) if user_id in pc_dict: pc_dict[user_id].set_ndays_act(record[1]) if record[1] > 0: pc_dict[user_id].set_viewed(1) else: utils.log("Student id: %s does not exist in {auth_user}." % user_id) except self.sql_pc_conn.ProgrammingError: utils.log("Couldnt find courseware_studentmodule for " + course_id) continue except: utils.log("Unexpected error:" + sys.exc_info()[0]) # Set attempted problems utils.log("{attempted_problems: courseware_studentmodule}") query = "SELECT student_id, COUNT(state) FROM courseware_studentmodule WHERE state LIKE '%correct_map%' AND student_id IS NOT NULL GROUP BY student_id" course_cursor.execute(query) result = course_cursor.fetchall() for record in result: user_id = int(record[0]) if user_id in pc_dict: pc_dict[user_id].set_attempted_problems(record[1]) else: utils.log("Student id: %s does not exist in {auth_user}." % user_id) # Set nplay_video based on the data in {courseware_studentmodule} utils.log("{nplay_video: courseware_studentmodule}") query = "SELECT student_id, COUNT(*) FROM courseware_studentmodule WHERE module_type = 'video' AND student_id IS NOT NULL GROUP BY student_id" course_cursor.execute(query) result = course_cursor.fetchall() for record in result: user_id = int(record[0]) if user_id in pc_dict: pc_dict[user_id].set_nplay_video(record[1]) # Set nchapters and explored based on the data in {courseware_studentmodule} utils.log("{nchapters: courseware_studentmodule}") query = "SELECT student_id, COUNT(DISTINCT module_id) FROM courseware_studentmodule WHERE module_type = 'chapter' AND student_id IS NOT NULL GROUP BY student_id" course_cursor.execute(query) result = course_cursor.fetchall() for record in result: user_id = int(record[0]) if user_id in pc_dict: pc_dict[user_id].set_nchapters(record[1]) if record[1] >= half_chapters: pc_dict[user_id].set_explored(1) else: pc_dict[user_id].set_explored(0) # Mongo # Discussion forum utils.log("{discussion_forum}") self.mongo_dbname = "discussion_forum" self.mongo_collectionname = course['discussiontable'] self.connect_to_mongo(self.mongo_dbname, self.mongo_collectionname) # Change call for new API code Tim Cavanagh 05/01/2016 # pipeline = [{"$group": {"_id": "$author_id", "postSum": {"$sum": 1}}}] # # user_posts = self.mongo_collection.aggregate(pipeline, allowDiskUse=True) user_posts = self.mongo_collection.aggregate([ # {"$match": {"author_id": {"$in": user_id_list}}}, {"$group": {"_id": "$author_id", "postSum": {"$sum": 1}}} ]) # ['result'] for item in user_posts: if "_id" in item and item["_id"] is not None: user_id = int(item["_id"]) if user_id in pc_dict: pc_dict[user_id].set_nforum_posts(item['postSum']) else: utils.log("Author id: %s does not exist in %s {auth_user}." % ( user_id, self.mongo_collectionname)) else: utils.log( "Author id: %s does not exist in %s {auth_user}." % (user_id, self.mongo_collectionname)) # Tracking logs utils.log("{logs}") self.mongo_dbname = "logs" self.mongo_collectionname = "clickstream" self.connect_to_mongo(self.mongo_dbname, self.mongo_collectionname) user_events = self.mongo_collection.aggregate([ {"$match": {"context.course_id": pc_course_id}}, # {"$sort": {"time": 1}}, {"$group": {"_id": "$context.user_id", "countrySet": {"$addToSet": "$country"}, "eventSum": {"$sum": 1}, "last_event": {"$last": "$time"}}} ], allowDiskUse=True, batchSize=100, useCursor=False) # ['result'] batchSize=100, , useCursor=False # if 'result' in user_events: # user_events = user_events['result'] for item in user_events: try: user_id = item["_id"] if user_id in pc_dict: pc_dict[user_id].set_last_event(item["last_event"][:len(u"YYYY-MM-DD")]) pc_dict[user_id].set_nevents(item["eventSum"]) pc_dict[user_id].set_final_cc_cname(item["countrySet"]) else: utils.log("Context.user_id: %s does not exist in %s {auth_user}." % (user_id, pc_course_id)) except TypeError as err: print "error %s item %s" % (err.message, item) # Set cf_item nregistered_students, nviewed_students, nexplored_students, ncertified_students nregistered_students = sum(pc_item.registered for pc_item in pc_dict.values()) nviewed_students = sum(pc_item.viewed for pc_item in pc_dict.values()) nexplored_students = sum(pc_item.explored for pc_item in pc_dict.values()) ncertified_students = sum(pc_item.certified for pc_item in pc_dict.values()) cf_item.set_nregistered_students(nregistered_students) cf_item.set_nviewed_students(nviewed_students) cf_item.set_nexplored_students(nexplored_students) cf_item.set_ncertified_students(ncertified_students) pc_cursor = self.sql_pc_conn.cursor() # print cf_item cf_item.save2db(pc_cursor, self.cf_table) # Till now, data preparation for pc_tablex has been finished. # Check consistent then write them into the database. utils.log("save to {personcourse}") tablename = self.pc_table + "_" + course_id for user_id, user_data in pc_dict.items(): pc_dict[user_id].set_inconsistent_flag() pc_dict[user_id].save2db(pc_cursor, tablename) self.sql_pc_conn.commit() self.datadump2csv(course_items) self.save_run_ingest() utils.log("Person course completed")