示例#1
0
    def run(self):
        """
        Runs every X seconds, the main run loop
        """
        print "RUNNING THE EVENT COUNT"
        utils.log("The Event Count starts at: " + str(datetime.now()))

        course_items = self.get_all_courses().items()
        self.clean_ec_db(course_items)

        last_run = self.find_last_run_ingest("EventCount")
        last_timefinder = self.find_last_run_ingest("TimeFinder")

        if self.finished_ingestion("TimeFinder") and last_run < last_timefinder:
            print "STARTING EVENT COUNT"
            for course_id, course in course_items:
                print "EVENT COUNTING " + str(course_id)
                print course_id

                # Get events from course info
                courseinfo = course_info.load_course_info(course['coursestructure'])
                if courseinfo is None:
                    utils.log("Can not find course info for ." + str(course_id))
                    continue

                # print courseinfo

                # Get events
                events = self.get_events(courseinfo)

                # Create courseevent table
                self.create_ec_table(course_id, events)

                events_date_counts = {}

                for event_id in events:
                    self.group_event_by_date(course['mongoname'], event_id, events_date_counts)

                # print events_date_counts

                # Insert records into database
                self.insert_ec_table(course_id, events_date_counts)

                self.sql_ec_conn.commit()

            self.loop = False
            utils.log("The Event Count ends at: " + str(datetime.now()))
示例#2
0
    def datadump2csv(self):
        """
        Generates a CSV file for CRM
        """
        e_tablename = self.ecrm_table

        print "Exporting CSV: " + e_tablename

        backup_path = config.EXPORT_PATH
        current_time = time.strftime('%m%d%Y-%H%M%S')

        # loop through courses -
        # write first file with headers then
        # each subsequent iteration append to file

        backup_prefix = e_tablename + "_" + current_time
        backup_file = os.path.join(backup_path, backup_prefix + ".csv")
        tmp_backup_file = backup_file + ".tmp"

        for idx, course in enumerate(self.get_all_courses().items()):
            try:
                course_id = course[0]
                mongoname = course[1]['mongoname']
                dbname = course[1]['dbname']

                # Get nice course name from course info
                courseinfo = course_info.load_course_info(course[1]['coursestructure'])
                if courseinfo is None:
                    utils.log("Can not find course info for ." + str(course_id))
                    continue
                nice_name = courseinfo['display_name']
                start = courseinfo['start'].split('T')
                start_date = datetime.strptime(start[0].replace('"', ''), "%Y-%m-%d")
                start_date = start_date.strftime("%d/%m/%Y")

                # au.last_login,

                query = "SELECT up.user_id, " \
                        "CASE au.is_staff " \
                        "WHEN 1 THEN 'Yes' ELSE 'No' END AS is_staff, " \
                        "au.is_active, TRIM(TRAILING '.' FROM e.email ) AS email, " \
                        "pc.viewed, pc.explored, pc.certified, pc.mode, " \
                        "TRIM(TRAILING '\\\\' FROM REPLACE(REPLACE(substring_index(e.full_name, ' ', 1),'�', ''), ',', '')) AS first_name, " \
                        "TRIM(TRAILING '\\\\' FROM SUBSTR(TRIM(REPLACE(REPLACE(REPLACE(REPLACE(SUBSTR(e.full_name, LOCATE(' ', e.full_name)), '�', ''), ',', ''), '|', ''), CONVERT(CHAR(127) USING utf8), '')), 1, 30)) AS last_name, " \
                        "'{2}' AS course_id, " \
                        "'{3}' AS course_name, " \
                        "'{5}' AS course_start_date, " \
                        "DATE_FORMAT(pc.start_time,'%d/%m/%Y') as enrolled, " \
                        "CASE WHEN DATE_FORMAT(NOW(), '%Y') - up.year_of_birth < 15 THEN 'No' ELSE CASE WHEN e.is_opted_in_for_email = 'True' THEN 'Yes' ELSE 'No' END END AS is_opted_in_for_email, " \
                        "CASE up.gender WHEN 'm' THEN 'Male'  WHEN 'f' THEN 'Female' WHEN 'o' THEN 'Other' ELSE NULL END as gender, " \
                        "CASE WHEN up.year_of_birth <= 1900 THEN NULL " \
                        "ELSE up.year_of_birth END AS year_of_birth ," \
                        "up.level_of_education, " \
                        "( CASE up.level_of_education " \
                        "WHEN 'p' THEN 'Doctorate' " \
                        "WHEN 'a' THEN 'Associate degree' " \
                        "WHEN 'b' THEN 'Bachelors degree' " \
                        "WHEN 'm' THEN 'Masters or professional degree' " \
                        "WHEN 'hs' THEN 'Secondary/high school' " \
                        "WHEN 'jhs' THEN 'Junior secondary/junior high/middle school' " \
                        "WHEN 'el' THEN 'Elementary/primary school' " \
                        "WHEN 'none' THEN 'No Formal Education' " \
                        "WHEN 'other' THEN 'Other Education' " \
                        "WHEN '' THEN 'User did not specify level of education' " \
                        "WHEN 'p_se' THEN 'Doctorate in science or engineering (no longer used)' " \
                        "WHEN 'p_oth' THEN 'Doctorate in another field (no longer used)' " \
                        "ELSE 'User did not specify level of education' END ) AS levelofEd, " \
                        "c.country_name " \
                        "FROM {0}.auth_user au " \
                        "JOIN {4}.emailcrm e ON au.email = e.email " \
                        "JOIN Person_Course.personcourse_{2} pc ON au.id = pc.user_id " \
                        "JOIN {0}.auth_userprofile up ON au.id = up.user_id " \
                        "LEFT JOIN {4}.countries_io c ON up.country = c.country_code "

                if not config.EMAILCRM_FULL_EXPORT:
                    query += "LEFT JOIN {4}.lastexport le " \
                        "ON le.user_id = up.user_id " \
                        "AND le.viewed = pc.viewed  " \
                        "AND le.explored = pc.explored " \
                        "AND le.certified = pc.certified " \
                        "AND le.course_id = '{2}' " \
                        "WHERE e.course_id = '{1}' " \
                        "AND le.user_id is null " \
                        "AND le.viewed is null " \
                        "AND le.explored is null " \
                        "AND le.certified is null " \
                        "AND le.course_id is null "

                query = query.format(dbname, mongoname, course_id, nice_name, self.ecrm_db, start_date)

                ec_cursor = self.sql_ecrm_conn.cursor()
                ec_cursor.execute(query)
                result = ec_cursor.fetchall()
                ec_cursor.close()

                if idx == 0:
                    with open(tmp_backup_file, "wb") as csv_file:
                        csv_writer = csv.writer(csv_file, dialect='excel', encoding='utf-8')
                        csv_writer.writerow([i[0] for i in ec_cursor.description])  # write headers
                        for row in result:
                            csv_writer.writerow(row)
                else:
                    with open(tmp_backup_file, "ab") as csv_file:
                        csv_writer = csv.writer(csv_file, dialect='excel', encoding='utf-8')
                        for row in result:
                            csv_writer.writerow(row)

                utils.log("EmailCRM select for %s appended to file: %s" % (course_id, tmp_backup_file))
            except Exception, e:
                print repr(e)
                utils.log("EmailCRM FAILED: %s" % (repr(e)))
                return
示例#3
0
    def run(self):
        """
        Runs every X seconds, the main run loop
        """
        last_run = self.find_last_run_ingest("PersonCourse")
        last_timefinder = self.find_last_run_ingest("TimeFinder")
        last_iptocountry = self.find_last_run_ingest("IpToCountry")
        last_dbstate = self.find_last_run_ingest("DatabaseState")

        if self.finished_ingestion("TimeFinder") and \
                        last_run < last_timefinder and \
                self.finished_ingestion("IpToCountry") and \
                        last_run < last_iptocountry and \
                self.finished_ingestion("DatabaseState") and \
                        last_run < last_dbstate:
            self.use_sql_databases()

            course_items = self.get_all_courses().items()

            # Create 'cf_table'
            self.create_cf_table()
            # Clean 'pc_table'
            self.clean_pc_db(course_items)

            for course_id, course in course_items:

                # Get chapters from course info
                courseinfo = course_info.load_course_info(course['coursestructure'])
                utils.log('PersonCourse: ' + str(course_id))
                if courseinfo is None:
                    utils.log("Can not find course info for ." + str(course_id))
                    continue

                cf_item = CFModel(course_id, course['dbname'], course['mongoname'], course['discussiontable'])
                # Set cf_item course_launch_date
                bad_start = False
                course_launch_date = None
                course_close_date = None
                if 'start' in courseinfo:
                    try:
                        course_launch_time = dateutil.parser.parse(courseinfo['start'].replace('"', ""))
                        course_launch_date = course_launch_time.date()
                        cf_item.set_course_launch_date(course_launch_date)
                    except Exception:
                        print "ERROR: BAD COURSE CODE START DATE"
                        print courseinfo['start']
                        print type(courseinfo['start'])
                        bad_start = True
                else:
                    utils.log("Course not started for " + course_id)
                    continue
                if bad_start:
                    continue
                # Set cf_item course_close_date
                if 'end' in courseinfo:
                    try:
                        course_close_time = dateutil.parser.parse(courseinfo['end'].replace('"', ""))
                        course_close_date = course_close_time.date()
                        cf_item.set_course_close_date(course_close_date)
                    except ValueError:
                        pass
                # Set cf_item course_length
                if course_launch_date and course_close_date:
                    date_delta = course_close_date - course_launch_date
                    cf_item.set_course_length(math.ceil(date_delta.days / 7.0))

                # Set cf_item nchapters
                chapters = []
                chapters = self.get_chapter(courseinfo, chapters)
                nchapters = len(chapters)
                cf_item.set_nchapters(nchapters)
                half_chapters = math.ceil(float(nchapters) / 2)

                # Set cf_item nvideos, nhtmls, nassessments, nsummative_assessments, nformative_assessments, nincontent_discussions, nactivities
                content = self.analysis_chapters(chapters)
                cf_item.set_nvideos(content['nvideos'])
                cf_item.set_nhtmls(content['nhtmls'])
                cf_item.set_nassessments(content['nassessments'])
                cf_item.set_nsummative_assessments(content['nsummative_assessments'])
                cf_item.set_nformative_assessments(content['nformative_assessments'])
                cf_item.set_nincontent_discussions(content['nincontent_discussions'])
                cf_item.set_nactivities(content['nactivities'])

                # Create 'pc_table'
                self.create_pc_table(course_items)

                # Dict of items of personcourse, key is the user id
                pc_dict = {}

                # Select the database
                self.sql_course_conn.select_db(course['dbname'])
                course_cursor = self.sql_course_conn.cursor()

                # course_id for PCModel
                pc_course_id = course['mongoname']

                utils.log("LOADING COURSE {" + pc_course_id + "}")

                # find all user_id
                utils.log("{auth_user}")
                query = "SELECT id, is_staff FROM auth_user"
                course_cursor.execute(query)
                result = course_cursor.fetchall()
                for record in result:
                    pc_dict[record[0]] = PCModel(pc_course_id, record[0])
                    pc_dict[record[0]].set_roles(record[1])

                # The list of user_id
                user_id_list = pc_dict.keys()
                user_id_list.sort()
                # print user_id_list

                # Set LoE, YoB, gender based on the data in {auth_userprofile}
                utils.log("{auth_userprofile}")
                query = "SELECT user_id, year_of_birth, level_of_education, gender FROM auth_userprofile WHERE user_id IN (" + ",".join(
                    ["%s"] * len(user_id_list)) + ")"
                query = query % tuple(user_id_list)
                course_cursor.execute(query)
                result = course_cursor.fetchall()
                for record in result:
                    user_id = int(record[0])
                    pc_dict[user_id].set_YoB(record[1])
                    pc_dict[user_id].set_LoE(record[2])
                    pc_dict[user_id].set_gender(record[3])

                # Set certified based on the data in {certificates_generatedcertificate}
                utils.log("{certificates_generatedcertificate}")
                query = "SELECT user_id, grade, status FROM certificates_generatedcertificate WHERE user_id IN (" + ",".join(
                    ["%s"] * len(user_id_list)) + ")"
                query = query % tuple(user_id_list)
                course_cursor.execute(query)
                result = course_cursor.fetchall()
                for record in result:
                    user_id = int(record[0])
                    pc_dict[user_id].set_grade(float(record[1]))
                    pc_dict[user_id].set_certified(record[2])

                # Set start_time based on the data in {student_courseenrollment}
                utils.log("{student_courseenrollment}")
                query = "SELECT user_id, created, mode FROM student_courseenrollment WHERE user_id IN (" + ",".join(
                    ["%s"] * len(user_id_list)) + ")"
                query = query % tuple(user_id_list)
                course_cursor.execute(query)
                result = course_cursor.fetchall()
                nhonor = 0
                naudit = 0
                nvertified = 0
                registration_open_date = datetime.date.today()
                for record in result:
                    user_id = int(record[0])
                    start_time = record[1]  # dateutil.parser.parse(record[1])
                    start_date = start_time.date()
                    pc_dict[user_id].set_start_time(start_date)
                    pc_dict[user_id].set_mode(record[2])
                    if record[2] == 'honor':
                        nhonor += 1
                    if record[2] == 'audit':
                        naudit += 1
                    if record[2] == 'verified':
                        nvertified += 1
                    if start_date < registration_open_date:
                        registration_open_date = start_date
                # Set cf_item nhonor_students, naudit_students, nvertified_students, registration_open_date
                cf_item.set_nhonor_students(nhonor)
                cf_item.set_naudit_students(naudit)
                cf_item.set_nvertified_students(nvertified)
                cf_item.set_registration_open_date(registration_open_date)

                # Set ndays_act and viewed based on the data in {courseware_studentmodule}
                try:
                    utils.log("{ndays_act: courseware_studentmodule}")
                    query = "SELECT student_id, COUNT(DISTINCT SUBSTRING(created, 1, 10)) FROM courseware_studentmodule WHERE student_id is not null GROUP BY student_id"
                    course_cursor.execute(query)
                    result = course_cursor.fetchall()
                    for record in result:
                        user_id = int(record[0])
                        if user_id in pc_dict:
                            pc_dict[user_id].set_ndays_act(record[1])
                            if record[1] > 0:
                                pc_dict[user_id].set_viewed(1)
                        else:
                            utils.log("Student id: %s does not exist in {auth_user}." % user_id)
                except self.sql_pc_conn.ProgrammingError:
                    utils.log("Couldnt find courseware_studentmodule for " + course_id)
                    continue
                except:
                    utils.log("Unexpected error:" + sys.exc_info()[0])

                # Set attempted problems
                utils.log("{attempted_problems: courseware_studentmodule}")
                query = "SELECT student_id, COUNT(state) FROM courseware_studentmodule WHERE state LIKE '%correct_map%' AND student_id IS NOT NULL GROUP BY student_id"
                course_cursor.execute(query)
                result = course_cursor.fetchall()
                for record in result:
                    user_id = int(record[0])
                    if user_id in pc_dict:
                        pc_dict[user_id].set_attempted_problems(record[1])
                    else:
                        utils.log("Student id: %s does not exist in {auth_user}." % user_id)

                # Set nplay_video based on the data in {courseware_studentmodule}
                utils.log("{nplay_video: courseware_studentmodule}")
                query = "SELECT student_id, COUNT(*) FROM courseware_studentmodule WHERE module_type = 'video' AND student_id IS NOT NULL GROUP BY student_id"
                course_cursor.execute(query)
                result = course_cursor.fetchall()
                for record in result:
                    user_id = int(record[0])
                    if user_id in pc_dict:
                        pc_dict[user_id].set_nplay_video(record[1])

                # Set nchapters and explored based on the data in {courseware_studentmodule}
                utils.log("{nchapters: courseware_studentmodule}")
                query = "SELECT student_id, COUNT(DISTINCT module_id) FROM courseware_studentmodule WHERE module_type = 'chapter' AND student_id IS NOT NULL GROUP BY student_id"
                course_cursor.execute(query)
                result = course_cursor.fetchall()
                for record in result:
                    user_id = int(record[0])
                    if user_id in pc_dict:
                        pc_dict[user_id].set_nchapters(record[1])
                        if record[1] >= half_chapters:
                            pc_dict[user_id].set_explored(1)
                        else:
                            pc_dict[user_id].set_explored(0)

                # Mongo
                # Discussion forum
                utils.log("{discussion_forum}")
                self.mongo_dbname = "discussion_forum"
                self.mongo_collectionname = course['discussiontable']
                self.connect_to_mongo(self.mongo_dbname, self.mongo_collectionname)
                # Change call for new API code Tim Cavanagh 05/01/2016
                # pipeline = [{"$group": {"_id": "$author_id", "postSum": {"$sum": 1}}}]
                #
                # user_posts = self.mongo_collection.aggregate(pipeline, allowDiskUse=True)
                user_posts = self.mongo_collection.aggregate([
                    # {"$match": {"author_id": {"$in": user_id_list}}},
                    {"$group": {"_id": "$author_id", "postSum": {"$sum": 1}}}
                ])  # ['result']
                
                for item in user_posts:
                    if "_id" in item and item["_id"] is not None:
                        user_id = int(item["_id"])
                        if user_id in pc_dict:
                            pc_dict[user_id].set_nforum_posts(item['postSum'])
                        else:
                            utils.log("Author id: %s does not exist in %s {auth_user}." % (
                                user_id, self.mongo_collectionname))
                    else:
                        utils.log(
                            "Author id: %s does not exist in %s {auth_user}." % (user_id, self.mongo_collectionname))

                # Tracking logs
                utils.log("{logs}")
                self.mongo_dbname = "logs"
                self.mongo_collectionname = "clickstream"
                self.connect_to_mongo(self.mongo_dbname, self.mongo_collectionname)

                user_events = self.mongo_collection.aggregate([
                    {"$match": {"context.course_id": pc_course_id}},
                    # {"$sort": {"time": 1}},
                    {"$group": {"_id": "$context.user_id", "countrySet": {"$addToSet": "$country"},
                                "eventSum": {"$sum": 1}, "last_event": {"$last": "$time"}}}
                ], allowDiskUse=True, batchSize=100, useCursor=False)  # ['result'] batchSize=100, , useCursor=False

                # if 'result' in user_events:
                #     user_events = user_events['result']
                for item in user_events:
                    try: 
                        user_id = item["_id"]
                        if user_id in pc_dict:
                            pc_dict[user_id].set_last_event(item["last_event"][:len(u"YYYY-MM-DD")])
                            pc_dict[user_id].set_nevents(item["eventSum"])
                            pc_dict[user_id].set_final_cc_cname(item["countrySet"])
                        else:
                            utils.log("Context.user_id: %s does not exist in %s {auth_user}." % (user_id, pc_course_id))
                    except TypeError as err:
                        print "error %s item %s" % (err.message, item) 

                # Set cf_item nregistered_students, nviewed_students, nexplored_students, ncertified_students
                nregistered_students = sum(pc_item.registered for pc_item in pc_dict.values())
                nviewed_students = sum(pc_item.viewed for pc_item in pc_dict.values())
                nexplored_students = sum(pc_item.explored for pc_item in pc_dict.values())
                ncertified_students = sum(pc_item.certified for pc_item in pc_dict.values())
                cf_item.set_nregistered_students(nregistered_students)
                cf_item.set_nviewed_students(nviewed_students)
                cf_item.set_nexplored_students(nexplored_students)
                cf_item.set_ncertified_students(ncertified_students)

                pc_cursor = self.sql_pc_conn.cursor()
                # print cf_item
                cf_item.save2db(pc_cursor, self.cf_table)

                # Till now, data preparation for pc_tablex has been finished.
                # Check consistent then write them into the database.
                utils.log("save to {personcourse}")
                tablename = self.pc_table + "_" + course_id
                for user_id, user_data in pc_dict.items():
                    pc_dict[user_id].set_inconsistent_flag()
                    pc_dict[user_id].save2db(pc_cursor, tablename)

                self.sql_pc_conn.commit()

            self.datadump2csv(course_items)
            self.save_run_ingest()
            utils.log("Person course completed")