def run_pipeline(ql=True, estimate_parameters=False, id_="simone", file_name=None, skipping=None, plotting=True): """ Main program that runs the pipeline processing all data. Loads the data and cleans it, then processes the data for the pipeline. Different sources need some methods to be different, their source is indicated with the id_ tag. Quick loading is possible to decrease the read-in and cleaning of the file. Needs to be set to False to load a new source_file. Is able to also estimate the parameters for the file. Parameters ---------- ql: bool Whether the data should be reloaded from a preprocessed file. estimate_parameters: bool Whether the parameters for the curves should be estimated. id_: str The identifier for the source file_name: str Path to the file. skipping: list of str or None A list of grouped options in the pipeline that will be skipped. plotting: bool Whether to plot the curves when processing them. Returns ------- None """ # Set up variables. phases = ["pre", "gui", "nap", "ap", "rap", "post"] if file_name is None: file_name = "./res/simone_all_data_all_attempts.xlsx" if skipping is None: skipping = [] # Load and preprocess data. if id_ not in ["ff"]: data, first_att_data, transfer_data, log_data = load( ql, file_name, id_) else: data, first_att_data, transfer_data, log_data = convert(file_name) skills = data.LOID.unique() skills = [8209, 8216, 10071, 12402, 10488, 8220, 12520, 8214] # ff hack: no transfer skills # print(data[['UserId', 'ExerciseId', 'LOID', 'phase']].loc[ # data.UserId == 2369506].tail(60)) # Inspect the data to determine whether preprocessing was performed correct # inspect(data) # return None # print(transfer_data.LOID.head()) print("Processing data") # Initiate creation of save file saver = Saver(data) processor = Processor(data, first_att_data, saver.short, saver.long, phases, log_data) if estimate_parameters is True: parameters = processor.estimate_parameters(skills, grain=100) print(parameters) # Start the processing of the different variables. # General stuff for skill in skills: processor.add_skill_to_long_file(skill) # Pre/post stuff if "pre/post" not in skipping: for phase in ["pre", "post"]: processor.count_total_correct_phase_exercises(phase) processor.calculate_gain() processor.get_transfer_score(transfer_data) # Total exercises stuff if "total exercises" not in skipping: processor.count_total_exercises_made() processor.count_total_exercises_correct() processor.count_total_exercises_made_att() processor.count_total_exercises_correct_att() # Per skill stuff if "per skill" not in skipping: for phase in ["pre", "post"]: for skill in skills: processor.skill_count_total_correct_phase_exercise( skill, phase) for skill in skills: processor.calculate_gain_per_skill(skill) for skill in skills: processor.get_last_ability_of_skill(skill) for skill in skills: processor.count_total_exercises_made_per_skill(skill) for skill in skills: processor.count_total_exercises_correct_per_skill(skill) for skill in skills: processor.calculate_percentage_correct_per_skill(skill) for skill in skills: processor.count_total_exercises_made_att_per_skill(skill) for skill in skills: processor.count_total_exercises_correct_att_per_skill(skill) for skill in skills: processor.calculate_percentage_correct_att_per_skill(skill) # for skill in skills: # processor.count_total_adaptive_per_skill(skill) # for skill in skills: # processor.count_correct_adaptive_per_skill(skill) # for skill in skills: # processor.calculate_percentage_correct_adaptive_per_skill(skill) # for skill in skills: # processor.count_total_adaptive_att_per_skill(skill) # for skill in skills: # processor.count_correct_adaptive_att_per_skill(skill) # for skill in skills: # processor.calculate_percentage_correct_adaptive_att_per_skill( # skill) # Curve stuff if "curves" not in skipping: print("processing curve data") # # FOR TESTING ONLY # # for skill in skills: # # processor.process_wrong_curves(skill, # # method="exclude_single_strays", do_plot=True) # # FOR TESTING ONLY if not os.path.exists(f'./plots/{id_}'): os.mkdir(f'./plots/{id_}') for skill in skills: processor.process_curves(skill, method="exclude_single_strays", do_plot=plotting, folder=id_, add_elo=True, add_ln=False) print(skipping) if "curve_statistics" not in skipping: print("processing statistics of curve data") for skill in skills: processor.calculate_type_curve(skill) for skill in skills: processor.get_phase_of_last_peak(skill) for skill in skills: processor.get_phase_of_last_peak(skill) for phase in phases: for skill in skills: processor.count_first_attempts_per_skill(phase, skill) for skill in skills: processor.calculate_general_spikiness(skill) for skill in skills: processor.calculate_phase_spikiness(skill, phases) for skill in skills: processor.get_total_amount_of_peaks(skill) for phase in phases: for skill in skills: processor.get_peaks_per_skill_per_phase(skill, phase) for skill in skills: processor.get_total_amount_of_trans_peaks(skill) for phase in phases: for skill in skills: processor.get_trans_peaks_per_skill_per_phase(skill, phase) # Per lesson stuff if "per lesson" not in skipping: try: for skill in skills: processor.get_last_ability_first_lesson_of_skill(skill, id_) for skill in skills: processor.get_total_exercises_made_first_lesson(skill, id_) for skill in skills: processor.get_total_exercises_correct_first_lesson(skill, id_) for skill in skills: processor.calculate_percentage_correct_first_lesson_total( skill, id_) except NotImplementedError: pass try: for skill in skills: processor.get_unique_exercises_made_first_lesson(skill, id_) # for skill in skills: # processor.get_unique_exercises_correct_first_lesson(skill, # id_) for skill in skills: processor.calculate_percentage_correct_first_lesson_unique( skill, id_) for skill in skills: processor.get_total_exercises_made_second_lesson(skill, id_) for skill in skills: processor.get_total_exercises_correct_second_lesson(skill, id_) for skill in skills: processor.calculate_percentage_correct_second_lesson_total( skill, id_) except NotImplementedError: pass try: for skill in skills: processor.get_unique_exercises_made_second_lesson(skill, id_) for skill in skills: processor.get_unique_exercises_correct_second_lesson( skill, id_) for skill in skills: processor.calculate_percentage_correct_second_lesson_unique( skill, id_) for skill in skills: processor.detect_missing_skill_first_lesson(skill, id_) for skill in skills: processor.detect_missing_skill_repeat_lesson(skill, id_) except NotImplementedError: pass # Effort stuff if "effort" not in skipping: if id_ in ["kb", "kb_all", "ff"]: for skill in skills: processor.calculate_average_effort(skill, id_) for skill in skills: processor.calculate_total_effort(skill, id_) # for skill in skills: # for moment in [1, 2, 3]: # processor.get_setgoal(skill, moment) # for skill in skills: # processor.get_shown_path_after_first_lesson(skill) # processor.get_shown_path_after_repeat_lesson(skill) if "logs" not in skipping: for skill in skills: for moment in range(3): processor.get_setgoal(skill, moment) for skill in skills: processor.get_changed_difficulties(skill) for skill in skills: processor.get_changed_difficulties_down(skill) for skill in skills: processor.get_changed_difficulties_up(skill) if "per lesson" not in skipping and "logs" not in skipping: for skill in skills: processor.calculate_difference_first_goal_reality(skill) for skill in skills: processor.calculate_difference_repeat_goal_reality(skill) for skill in skills: processor.calculate_difference_end_goal_reality(skill) if "saving" not in skipping: save(saver, processor, f_name=id_)