from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import publication_type_output import pandas as pd # load json file load_json() # get publication type data publicationtype = get_data(publication_type_output) publicationtype_df = pd.DataFrame(publicationtype) publicationtype_df = publicationtype_df.T publicationtype_df.columns = ["pub_type_raw"] # Get Publication Type highlighted text publicationtype_HT = highlighted_text(publication_type_output) publicationtype_HT_df = pd.DataFrame(publicationtype_HT) publicationtype_HT_df = publicationtype_HT_df.T publicationtype_HT_df.columns = ["pubtype_ht"] # Get Publication Type user comments publicationtype_Comments = comments(publication_type_output) publicationtype_Comments_df = pd.DataFrame(publicationtype_Comments) publicationtype_Comments_df = publicationtype_Comments_df.T publicationtype_Comments_df.columns = ["pubtype_info"] # concatenate data frames publication_type_df = pd.concat( [publicationtype_df, publicationtype_HT_df, publicationtype_Comments_df], axis=1, sort=False)
from Main import get_data, highlighted_text, comments from AttributeIDList import edu_setting_output import pandas as pd # get educational setting data edusetting = get_data(edu_setting_output) edusetting_df = pd.DataFrame(edusetting) edusetting_df = edusetting_df.T edusetting_df.columns=["int_setting_raw"] # binarize educational setting data """ edusetting_df["int_setting_primary/elementary_school"] = edusetting_df["int_setting_raw"].map(set(['Primary/elementary school']).issubset).astype(int) edusetting_df["int_setting_middle_school"] = edusetting_df["int_setting_raw"].map(set(['Middle school']).issubset).astype(int) edusetting_df["int_setting_secondary/high_school"] = edusetting_df["int_setting_raw"].map(set(['Secondary/High school']).issubset).astype(int) """ # Get Educational Setting highlighted text edusetting_HT = highlighted_text(edu_setting_output) edusetting_HT_df = pd.DataFrame(edusetting_HT) edusetting_HT_df = edusetting_HT_df.T edusetting_HT_df.columns = ["int_setting_ht"] # Get Educational Setting user comments edusetting_Comments = comments(edu_setting_output) edusetting_Comments_df = pd.DataFrame(edusetting_Comments) edusetting_Comments_df = edusetting_Comments_df.T edusetting_Comments_df.columns = ["int_setting_info"] # concatenate data frames educational_setting_df = pd.concat([ edusetting_df, edusetting_HT_df,
from Main import load_json, get_data, get_outcome_lvl2 from AttributeIDList import test_type_main, test_type_output import pandas as pd # load json file load_json() # get test type main extraction data test_type_main = get_data(test_type_main) test_type_main_df = pd.DataFrame(test_type_main) test_type_main_df = test_type_main_df.T test_type_main_df.columns = ["test_type_raw"] test_type_main_df["test_type_standardised_test"] = test_type_main_df["test_type_raw"].map( set(['Standardised test']).issubset).astype(int) test_type_main_df["test_type_researcher_developed_test"] = test_type_main_df["test_type_raw"].map( set(['Researcher developed test']).issubset).astype(int) test_type_main_df["test_type_school_developed_test"] = test_type_main_df["test_type_raw"].map( set(['School-developed test']).issubset).astype(int) test_type_main_df["test_type_normal_test_or_examination"] = test_type_main_df["test_type_raw"].map( set(['National test or examination']).issubset).astype(int) test_type_main_df["test_type_international_tests"] = test_type_main_df["test_type_raw"].map( set(['International tests']).issubset).astype(int) # get test type outcome data testtype_outcome = get_outcome_lvl2(test_type_output) testtype_outcome_df = pd.DataFrame(testtype_outcome) # name each column (number depends on outcome number) testtype_outcome_df.columns = [ "out_test_type_raw_"+'{}'.format(column+1) for column in testtype_outcome_df.columns]
# https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups low_income = [ "Afghanistan", "Guinea-Bissau", "Sierra Leone", "Burkina Faso", "Haiti", "Somalia", "Burundi", "Korea, Dem. People's Rep.", "South Sudan", "Central African Republic", "Liberia", "Sudan", "Chad", "Madagascar", "Syrian Arab Republic", "Congo, Dem. Rep", "Malawi", "Tajikistan", "Eritrea", "Mali", "Togo" "Ethiopia", "Mozambique", "Uganda" "Gambia, The", "Niger", "Yemen, Rep.", "Guinea", "Rwanda" ] ############################################## # get country data country = get_data(countries) country_df = pd.DataFrame(country) country_df = country_df.T country_df.columns = ["loc_country_raw"] # get country highlighted text country_HT = highlighted_text(countries) country_HT_df = pd.DataFrame(country_HT) country_HT_df = country_HT_df.T country_HT_df.columns = ["loc_country_ht"] # get country user comments country_Comments = comments(countries) country_Comments_df = pd.DataFrame(country_Comments) country_Comments_df = country_Comments_df.T country_Comments_df.columns = ["loc_country_info"]
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import other_outcomes_output from AttributeIDList import additional_outcomes_output from AttributeIDList import other_participants_output import pandas as pd # load json file load_json() ################# # Other outcomes ################# # get other outcomes data other_outcomes = get_data(other_outcomes_output) other_outcomes_df = pd.DataFrame(other_outcomes) other_outcomes_df = other_outcomes_df.T other_outcomes_df.columns = ["out_other_raw"] # get other outcomes highlighted text other_outcomes_HT = highlighted_text(other_outcomes_output) other_outcomes_HT_df = pd.DataFrame(other_outcomes_HT) other_outcomes_HT_df = other_outcomes_HT_df.T other_outcomes_HT_df.columns = ["out_other_ht"] # get other outcomes comments other_outcomes_info = comments(other_outcomes_output) other_outcomes_info_df = pd.DataFrame(other_outcomes_info) other_outcomes_info_df = other_outcomes_info_df.T other_outcomes_info_df.columns = ["out_other_info"]
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import randomisation_details import pandas as pd # load json file load_json() # get randomisation data randomisation = get_data(randomisation_details) randomisation_df = pd.DataFrame(randomisation) randomisation_df = randomisation_df.T randomisation_df.columns = ["rand_raw"] # Get Randomisation highlighted text randomisation_HT = highlighted_text(randomisation_details) randomisation_details_df = pd.DataFrame(randomisation_HT) randomisation_details_df = randomisation_details_df.T randomisation_details_df.columns = ["rand_ht"] # Get Randomisation user comments randomisation_Comments = comments(randomisation_details) randomisation_Comments_df = pd.DataFrame(randomisation_Comments) randomisation_Comments_df = randomisation_Comments_df.T randomisation_Comments_df.columns = ["rand_info"] # concatenate data frames randomisation_df = pd.concat( [randomisation_df, randomisation_details_df, randomisation_Comments_df], axis=1, sort=False)
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import treatment_group import pandas as pd # load json file load_json() # get treatment group data treatmentgroup = get_data(treatment_group) treatmentgroup_df = pd.DataFrame(treatmentgroup) treatmentgroup_df = treatmentgroup_df.T treatmentgroup_df.columns = ["treat_group_raw"] # get treatment group highlighted text treatmentgroup_HT = highlighted_text(treatment_group) treatmentgroup_HT_df = pd.DataFrame(treatmentgroup_HT) treatmentgroup_HT_df = treatmentgroup_HT_df.T treatmentgroup_HT_df.columns = ["treat_group_ht"] # get treatment group user comments treatmentgroup_Comments = comments(treatment_group) treatmentgroup_Comments_df = pd.DataFrame(treatmentgroup_Comments) treatmentgroup_Comments_df = treatmentgroup_Comments_df.T treatmentgroup_Comments_df.columns = ["treat_group_info"] # concatenate data frames treatment_group_df = pd.concat( [treatmentgroup_df, treatmentgroup_HT_df, treatmentgroup_Comments_df], axis=1, sort=False)
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import intervention_implementation_details import pandas as pd # load json file load_json() # get intervention implementation detail data InterventionDetail = get_data(intervention_implementation_details) InterventionDetail_df = pd.DataFrame(InterventionDetail) InterventionDetail_df = InterventionDetail_df.T InterventionDetail_df.columns = ["int_fidel_raw"] # get intervention implementation detail highlighted text InterventionDetail_HT = highlighted_text(intervention_implementation_details) InterventionDetail_HT_df = pd.DataFrame(InterventionDetail_HT) InterventionDetail_HT_df = InterventionDetail_HT_df.T InterventionDetail_HT_df.columns = ["int_fidel_ht"] # get intervention implementation detail user comments InterventionDetail_Comments = comments(intervention_implementation_details) InterventionDetail_Comments_df = pd.DataFrame(InterventionDetail_Comments) InterventionDetail_Comments_df = InterventionDetail_Comments_df.T InterventionDetail_Comments_df.columns = ["int_fidel_info"] # concatenate data frames intervention_detail_df = pd.concat([ InterventionDetail_df, InterventionDetail_HT_df, InterventionDetail_Comments_df ], axis=1,
from Main import get_data, highlighted_text, comments from AttributeIDList import comparability_output import pandas as pd # get comparability data comparability = get_data(comparability_output) comparability_df = pd.DataFrame(comparability) comparability_df = comparability_df.T comparability_df.columns = ["comp_anal_raw"] # Get Baseline Differences highlighted text comparability_HT = highlighted_text(comparability_output) comparability_HT_df = pd.DataFrame(comparability_HT) comparability_HT_df = comparability_HT_df.T comparability_HT_df.columns = ["comp_anal_ht"] # Get Educational Setting user comments comparability_Comments = comments(comparability_output) comparability_Comments_df = pd.DataFrame(comparability_Comments) comparability_Comments_df = comparability_Comments_df.T comparability_Comments_df.columns = ["comp_anal_info"] # concatenate data frames comparability_df = pd.concat( [comparability_df, comparability_HT_df, comparability_Comments_df], axis=1, sort=False) # fill blanks with NA comparability_df.fillna("NA", inplace=True)
from Main import get_data, highlighted_text, comments from AttributeIDList import student_gender import pandas as pd # get gender data gender = get_data(student_gender) gender_df = pd.DataFrame(gender) gender_df = gender_df.T gender_df.columns = ["part_gen_raw"] """ gender_df["part_gen_female_only"] = gender_df["part_gen_raw"].map(set(['Female only']).issubset).astype(int) gender_df["part_gen_male_only"] = gender_df["part_gen_raw"].map(set(['Male only']).issubset).astype(int) gender_df["part_gen_mixed_gender"] = gender_df["part_gen_raw"].map(set(['Mixed gender']).issubset).astype(int) gender_df["part_gen_no_info_provided"] = gender_df["part_gen_raw"].map(set(['No information provided']).issubset).astype(int) """ # Get Gender highlighted text gender_HT = highlighted_text(student_gender) gender_HT_df = pd.DataFrame(gender_HT) gender_HT_df = gender_HT_df.T gender_HT_df.columns = ["part_gen_ht"] # Get Gender user comments gender_Comments = comments(student_gender) gender_Comments_df = pd.DataFrame(gender_Comments) gender_Comments_df = gender_Comments_df.T gender_Comments_df.columns = ["part_gen_info"] # concatenate data frames gender_df = pd.concat([gender_df, gender_HT_df, gender_Comments_df], axis=1, sort=False)
further_ses_info_Comments_df = pd.DataFrame(further_ses_info_Comments) further_ses_info_Comments_df = further_ses_info_Comments_df.T further_ses_info_Comments_df.columns = ["fsm_info_info"] # get further low ses info highlighted text further_ses_fsm_info_HT = highlighted_text(further_ses_fsm_info_output) further_ses_fsm_info_HT_df = pd.DataFrame(further_ses_fsm_info_HT) further_ses_fsm_info_HT_df = further_ses_fsm_info_HT_df.T further_ses_fsm_info_HT_df.columns = ["fsm_info_ht"] ####################################### # NO LOW SES/FSM INFORMATION PROVIDED # ####################################### # get now low ses info data no_low_ses_fsm_info = get_data(no_ses_fsm_info_provided_output) no_low_ses_fsm_info_df = pd.DataFrame(no_low_ses_fsm_info) no_low_ses_fsm_info_df = no_low_ses_fsm_info_df.T no_low_ses_fsm_info_df.columns = ["fsm_na_raw"] # get no low ses info comments no_low_ses_fsm_info_comments = comments(no_ses_fsm_info_provided_output) no_low_ses_fsm_info_comments_df = pd.DataFrame(no_low_ses_fsm_info_comments) no_low_ses_fsm_info_comments_df = no_low_ses_fsm_info_comments_df.T no_low_ses_fsm_info_comments_df.columns = ["fsm_na_info"] """ no_low_ses_fsm_info_df["No_SES_FSM_Info"]=no_low_ses_fsm_info_df["No_SES_FSM_Info_Provided"].map(set(['No SES/FSM Information Provided']).issubset).astype(int) """ # concatenate datafeames ses_fsm_df = pd.concat([ low_ses_proportion_Comments_df, low_ses_proportion_HT_df, low_ses_percentage_Comments_df, low_ses_percentage_HT_df,
from Main import load_json, get_data, comments, highlighted_text from CODES import study_realism_output import pandas as pd # load json file load_json() # get study realism data studyrealism = get_data(study_realism_output) studyrealism_df = pd.DataFrame(studyrealism) studyrealism_df = studyrealism_df.T studyrealism_df.columns = ["eco_valid_raw"] # get study realism highlighted text studyrealism_HT = highlighted_text(study_realism_output) studyrealism_HT_df = pd.DataFrame(studyrealism_HT) studyrealism_HT_df = studyrealism_HT_df.T studyrealism_HT_df.columns = ["eco_valid_ht"] # get study realism user comments studyrealism_Comments = comments(study_realism_output) studyrealism_Comments_df = pd.DataFrame(studyrealism_Comments) studyrealism_Comments_df = studyrealism_Comments_df.T studyrealism_Comments_df.columns = ["eco_valid_info"] # concatenate data frames study_realism_df = pd.concat( [studyrealism_df, studyrealism_HT_df, studyrealism_Comments_df], axis=1, sort=False)
from Main import get_data, comments from AttributeIDList import admin_strand_output, admin_strand_secondary import pandas as pd # get admin strand data admin_strand = get_data(admin_strand_output) adminstrand_df = pd.DataFrame(admin_strand) adminstrand_df = adminstrand_df.T adminstrand_df.columns = ["strand_raw"] # get admin strand update data (if it exists) [temp] admin_strand_other = get_data(admin_strand_secondary) adminstrand_secondary_df = pd.DataFrame(admin_strand_other) adminstrand_secondary_df = adminstrand_secondary_df.T adminstrand_secondary_df.columns = ["SGT_Update_2020"] # Get Strand comment data admin_strand_comments = comments(admin_strand_output) admin_strand_comments_df = pd.DataFrame(admin_strand_comments) admin_strand_comments_df = admin_strand_comments_df.T admin_strand_comments_df.columns = ["strand_info"] # concatenate data frames admin_strand_df = pd.concat([adminstrand_df, admin_strand_comments_df], axis=1, sort=False) # remove problematic text admin_strand_df.replace('\r', ' ', regex=True, inplace=True) admin_strand_df.replace('\n', ' ', regex=True, inplace=True) admin_strand_df.replace(':', ' ', regex=True, inplace=True)
from Main import get_data, highlighted_text, comments from AttributeIDList import comparabiltiy_vars_reported from AttributeIDList import if_yes_which_comparability_variables_reported_output import pandas as pd ##################################################### # Are the variables used for comparability reported? ##################################################### # get comparability variables reported data comparability_vars_reported = get_data(comparabiltiy_vars_reported) comparability_vars_reported_df = pd.DataFrame(comparability_vars_reported) comparability_vars_reported_df = comparability_vars_reported_df.T comparability_vars_reported_df.columns = ["comp_var_rep_raw"] # Get Comparability Variables Reported highlighted text comparability_vars_reported_HT = highlighted_text(comparabiltiy_vars_reported) comparability_vars_reported_HT_df = pd.DataFrame( comparability_vars_reported_HT) comparability_vars_reported_HT_df = comparability_vars_reported_HT_df.T comparability_vars_reported_HT_df.columns = ["comp_var_rep_ht"] # Get Comparability Variables Reported user comments comparability_vars_reported_Comments = comments(comparabiltiy_vars_reported) comparability_vars_reported_Comments_df = pd.DataFrame( comparability_vars_reported_Comments) comparability_vars_reported_Comments_df = comparability_vars_reported_Comments_df.T comparability_vars_reported_Comments_df.columns = ["comp_var_rep_info"] ###################################################### # If yes, which variables are used for comparability?
from Main import get_data, highlighted_text, comments from AttributeIDList import curriculum_subjects from AttributeIDList import other_outcomes_output from AttributeIDList import which_other_outcomes_output from AttributeIDList import other_participants_output import pandas as pd # get curriculum subjects data curriculumsubjects = get_data(curriculum_subjects) curriculumsubjects_df = pd.DataFrame(curriculumsubjects) curriculumsubjects_df = curriculumsubjects_df.T curriculumsubjects_df.columns = ["test_subject_raw"] # binarize curriculum subject options ''' curriculumsubjects_df["test_subject_literacy_(first_language)"] = curriculumsubjects_df["test_subject_raw"].map(set(['Literacy (first language)']).issubset).astype(int) curriculumsubjects_df["test_subject_reading_Comprehension"] = curriculumsubjects_df["test_subject_raw"].map(set(['Reading comprehension']).issubset).astype(int) curriculumsubjects_df["test_subject_decoding/phonics"] = curriculumsubjects_df["test_subject_raw"].map(set(['Decoding/phonics']).issubset).astype(int) curriculumsubjects_df["test_subject_spelling"] = curriculumsubjects_df["test_subject_raw"].map(set(['Spelling']).issubset).astype(int) curriculumsubjects_df["test_subject_reading_other"] = curriculumsubjects_df["test_subject_raw"].map(set(['Reading other']).issubset).astype(int) curriculumsubjects_df["test_subject_speaking_and_listening/oral_language"] = curriculumsubjects_df["test_subject_raw"].map(set(['Speaking and listening/Oral language']).issubset).astype(int) curriculumsubjects_df["test_subject_writing"] = curriculumsubjects_df["test_subject_raw"].map(set(['Writing']).issubset).astype(int) curriculumsubjects_df["test_subject_mathematics"] = curriculumsubjects_df["test_subject_raw"].map(set(['Mathematics']).issubset).astype(int) curriculumsubjects_df["test_subject_science"] = curriculumsubjects_df["test_subject_raw"].map(set(['Science']).issubset).astype(int) curriculumsubjects_df["test_subject_social_studies"] = curriculumsubjects_df["test_subject_raw"].map(set(['Social studies']).issubset).astype(int) curriculumsubjects_df["test_subject_arts"] = curriculumsubjects_df["test_subject_raw"].map(set(['Arts']).issubset).astype(int) curriculumsubjects_df["test_subject_languages"] = curriculumsubjects_df["test_subject_raw"].map(set(['Languages']).issubset).astype(int) curriculumsubjects_df["test_subject_other_curriculum_test"] = curriculumsubjects_df["test_subject_raw"].map(set(['Other curriculum test']).issubset).astype(int) ''' # Get Country highlighted text curriculumsubjects_HT = highlighted_text(curriculum_subjects) curriculumsubjects_HT_df = pd.DataFrame(curriculumsubjects_HT)
from Main import load_json, get_data, comments, highlighted_text from CODES import intervention_evaluation import pandas as pd # load json file load_json() # get intervention costs reported main data InterventionEvaluation = get_data(intervention_evaluation) InterventionEvaluation_df = pd.DataFrame(InterventionEvaluation) InterventionEvaluation_df = InterventionEvaluation_df.T InterventionEvaluation_df.columns = ["out_eval_raw"] InterventionEvaluation_df["eef_eval_raw"] = InterventionEvaluation_df[ "out_eval_raw"].map(set(["Is this an EEF evaluation?" ]).issubset).astype(int) InterventionEvaluation_df["eef_eval_raw"] = InterventionEvaluation_df[ "eef_eval_raw"].replace(to_replace=[0, 1], value=["No", "Yes"]) # get intervention costs reported highlighted text InterventionEvaluation_HT = highlighted_text(intervention_evaluation) InterventionEvaluation_HT_df = pd.DataFrame(InterventionEvaluation_HT) InterventionEvaluation_HT_df = InterventionEvaluation_HT_df.T InterventionEvaluation_HT_df.columns = ["out_eval_ht"] # get intervention costs reported user comments InterventionEvaluation_Comments = comments(intervention_evaluation) InterventionEvaluation_Comments_df = pd.DataFrame( InterventionEvaluation_Comments) InterventionEvaluation_Comments_df = InterventionEvaluation_Comments_df.T InterventionEvaluation_Comments_df.columns = ["out_eval_info"]
number_of_classes_total_Comments) number_of_classes_total_Comments_df = number_of_classes_total_Comments_df.T number_of_classes_total_Comments_df.columns = ["class_total_info"] # get number of classes total highlighted text data number_of_classes_total_HT = highlighted_text(number_of_classes_total_output) number_of_classes_total_HT_df = pd.DataFrame(number_of_classes_total_HT) number_of_classes_total_HT_df = number_of_classes_total_HT_df.T number_of_classes_total_HT_df.columns = ["class_total_ht"] ######################################################### # NUMBER OF CLASSES NOT PROVIDED/UNCLEAR/NOT APPLICABLE # ######################################################### # get number of classes not provided data number_of_classes_np = get_data(number_of_classes_not_provided_output) number_of_classes_np_df = pd.DataFrame(number_of_classes_np) number_of_classes_np_df = number_of_classes_np_df.T number_of_classes_np_df.columns = ["class_na_raw"] # get number of classes not provided comments data number_of_classes_not_provided_Comments = comments( number_of_classes_not_provided_output) number_of_classes_not_provided_Comments_df = pd.DataFrame( number_of_classes_not_provided_Comments) number_of_classes_not_provided_Comments_df = number_of_classes_not_provided_Comments_df.T number_of_classes_not_provided_Comments_df.columns = ["class_na_info"] # get number of classes not provided highlighted text data number_of_classes_not_provided_HT = highlighted_text( number_of_classes_not_provided_output)
from Main import get_data, highlighted_text, comments from AttributeIDList import student_age_output import pandas as pd # get age data student_age = get_data(student_age_output) student_age_df = pd.DataFrame(student_age) student_age_df = student_age_df.T student_age_df.columns = ["part_age_raw"] """ student_age_df["part_age_3"]=student_age_df["part_age_raw"].map(set(['3']).issubset).astype(int) student_age_df["part_age_4"]=student_age_df["part_age_raw"].map(set(['4']).issubset).astype(int) student_age_df["part_age_5"]=student_age_df["part_age_raw"].map(set(['5']).issubset).astype(int) student_age_df["part_age_6"]=student_age_df["part_age_raw"].map(set(['6']).issubset).astype(int) student_age_df["part_age_7"]=student_age_df["part_age_raw"].map(set(['7']).issubset).astype(int) student_age_df["part_age_8"]=student_age_df["part_age_raw"].map(set(['8']).issubset).astype(int) student_age_df["part_age_9"]=student_age_df["part_age_raw"].map(set(['9']).issubset).astype(int) student_age_df["part_age_10"]=student_age_df["part_age_raw"].map(set(['10']).issubset).astype(int) student_age_df["part_age_11"]=student_age_df["part_age_raw"].map(set(['11']).issubset).astype(int) student_age_df["part_age_12"]=student_age_df["part_age_raw"].map(set(['12']).issubset).astype(int) student_age_df["part_age_13"]=student_age_df["part_age_raw"].map(set(['13']).issubset).astype(int) student_age_df["part_age_14"]=student_age_df["part_age_raw"].map(set(['14']).issubset).astype(int) student_age_df["part_age_15"]=student_age_df["part_age_raw"].map(set(['15']).issubset).astype(int) student_age_df["part_age_16"]=student_age_df["part_age_raw"].map(set(['16']).issubset).astype(int) student_age_df["part_age_17"]=student_age_df["part_age_raw"].map(set(['17']).issubset).astype(int) student_age_df["part_age_18"]=student_age_df["part_age_raw"].map(set(['18']).issubset).astype(int) student_age_df["part_age_no_information_provided"]=student_age_df["part_age_raw"].map(set(['No information provided']).issubset).astype(int) """ # get student age highlighted text student_age_HT = highlighted_text(student_age_output) student_age_HT_df = pd.DataFrame(student_age_HT)
ControlGroupOtherInfo_HT_df = pd.DataFrame(ControlGroupOtherInfo_HT) ControlGroupOtherInfo_HT_df = ControlGroupOtherInfo_HT_df.T ControlGroupOtherInfo_HT_df.columns = ["out_c2_other_ht"] # Get Control Group Other Information comments ControlGroupOtherInfo_comments = comments(control_group_two_any_other_info) ControlGroupOtherInfo_comments_df = pd.DataFrame(ControlGroupOtherInfo_comments) ControlGroupOtherInfo_comments_df = ControlGroupOtherInfo_comments_df.T ControlGroupOtherInfo_comments_df.columns = ["out_c2_other_info"] ######################## # Follow up data? ######################## # Get Follow Up Data followupdata = get_data(follow_up_data_reported) followupdata_df = pd.DataFrame(followupdata) followupdata_df = followupdata_df.T followupdata_df.columns = ["follow_up_raw"] # Get Follow Up Data highlighted text followupdata_HT = highlighted_text(follow_up_data_reported) followupdata_HT_df = pd.DataFrame(followupdata_HT) followupdata_HT_df = followupdata_HT_df.T followupdata_HT_df.columns = ["follow_up_ht"] # Get Follow Up Data comments followupdata_comments = comments(follow_up_data_reported) followupdata_comments_df = pd.DataFrame(followupdata_comments) followupdata_comments_df = followupdata_comments_df.T followupdata_comments_df.columns = ["follow_up_info"]
number_of_schools_total_Comments) number_of_schools_total_Comments_df = number_of_schools_total_Comments_df.T number_of_schools_total_Comments_df.columns = ["school_total_info"] # get total number of schools highlighted text data number_of_schools_total_HT = highlighted_text(number_of_schools_total_output) number_of_schools_total_HT_df = pd.DataFrame(number_of_schools_total_HT) number_of_schools_total_HT_df = number_of_schools_total_HT_df.T number_of_schools_total_HT_df.columns = ["school_total_ht"] ######################################################### # NUMBER OF SCHOOLS NOT PROVIDED/UNCLEAR/NOT APPLICABLE # ######################################################### # get number of schools not provided data number_of_schools_np = get_data(number_of_schools_not_provided_output) number_of_schools_np_df = pd.DataFrame(number_of_schools_np) number_of_schools_np_df = number_of_schools_np_df.T number_of_schools_np_df.columns = ["school_na_raw"] # get number of schools not provided comments data number_of_schools_np_Comments = comments(number_of_schools_not_provided_output) number_of_schools_np_Comments_df = pd.DataFrame(number_of_schools_np_Comments) number_of_schools_np_Comments_df = number_of_schools_np_Comments_df.T number_of_schools_np_Comments_df.columns = ["school_na_info"] # get number of schools not provided highlighted text data number_of_schools_np_HT = highlighted_text( number_of_schools_not_provided_output) number_of_schools_np_HT_df = pd.DataFrame(number_of_schools_np_HT) number_of_schools_np_HT_df = number_of_schools_np_HT_df.T
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import intervention_approach_digital_technology from AttributeIDList import intervention_approach_parents_or_community_volunteers import pandas as pd # load json file load_json() ########################################### # DIGITAL TECHNOLOGY INTERVENTION INCLUSION ########################################### # Get Digital Technology (inclusion) main data DigitalTechnology = get_data(intervention_approach_digital_technology) DigitalTechnology_df = pd.DataFrame(DigitalTechnology) DigitalTechnology_df = DigitalTechnology_df.T DigitalTechnology_df.columns = ["digit_tech_raw"] # Get Digital Technology (inclusion) highlighted text DigitalTechnology_HT = highlighted_text( intervention_approach_digital_technology) DigitalTechnology_HT_df = pd.DataFrame(DigitalTechnology_HT) DigitalTechnology_HT_df = DigitalTechnology_HT_df.T DigitalTechnology_HT_df.columns = ["digit_tech_ht"] # Get Digital Technology (inclusion) user comments DigitalTechnology_Comments = comments(intervention_approach_digital_technology) DigitalTechnology_Comments_df = pd.DataFrame(DigitalTechnology_Comments) DigitalTechnology_Comments_df = DigitalTechnology_Comments_df.T DigitalTechnology_Comments_df.columns = ["digit_tech_info"]
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import intervention_delivery_output import pandas as pd # load json file load_json() # get intervention delivery data InterventionDelivery = get_data(intervention_delivery_output) interventiondelivery_df = pd.DataFrame(InterventionDelivery) interventiondelivery_df = interventiondelivery_df.T interventiondelivery_df.columns = ["int_who_raw"] # get intervention delivery highlighted text InterventionDelivery_HT = highlighted_text(intervention_delivery_output) InterventionDelivery_HT_df = pd.DataFrame(InterventionDelivery_HT) InterventionDelivery_HT_df = InterventionDelivery_HT_df.T InterventionDelivery_HT_df.columns = ["int_who_ht"] # get intervention delivery user comments InterventionDelivery_Comments = comments(intervention_delivery_output) InterventionDelivery_Comments_df = pd.DataFrame(InterventionDelivery_Comments) InterventionDelivery_Comments_df = InterventionDelivery_Comments_df.T InterventionDelivery_Comments_df.columns = ["int_who_info"] # concatenate data frames intervention_delivery_df = pd.concat([ interventiondelivery_df, InterventionDelivery_HT_df, InterventionDelivery_Comments_df ], axis=1, sort=False)
from Main import get_data, highlighted_text, comments from AttributeIDList import baseline_differences_output import pandas as pd # extract baseline differences data baselinedifferences = get_data(baseline_differences_output) baselinedifferences_df = pd.DataFrame(baselinedifferences) baselinedifferences_df = baselinedifferences_df.T baselinedifferences_df.columns=["base_diff_raw"] # Get Baseline Differences highlighted text baselinedifferences_HT = highlighted_text(baseline_differences_output) baselinedifferences_HT_df = pd.DataFrame(baselinedifferences_HT) baselinedifferences_HT_df = baselinedifferences_HT_df.T baselinedifferences_HT_df.columns = ["base_diff_ht"] # Get Educational Setting user comments baselinedifferences_Comments = comments(baseline_differences_output) baselinedifferences_Comments_df = pd.DataFrame(baselinedifferences_Comments) baselinedifferences_Comments_df = baselinedifferences_Comments_df.T baselinedifferences_Comments_df.columns = ["base_diff_info"] # concatenate data frames baseline_differences_df = pd.concat([ baselinedifferences_df, baselinedifferences_HT_df, baselinedifferences_Comments_df ], axis=1, sort=False) # fill blanks with NA baseline_differences_df.fillna("NA", inplace=True)
from AttributeIDList import sample_output import pandas as pd # load json file load_json() # get sample data sample = get_outcome_lvl2(sample_output) sample_df = pd.DataFrame(sample) # name each column (number depends on outcome number) sample_df.columns = [ "out_samp_" + '{}'.format(column + 1) for column in sample_df.columns ] # get sample main check data sample_main_check = get_data(sample_output) sample_main_check_df = pd.DataFrame(sample_main_check) sample_main_check_df = sample_main_check_df.T sample_main_check_df.columns = ["main_check"] # concatenate dataframes all_variables = pd.concat([sample_df, sample_main_check_df], axis=1, sort=False) # fill blanks with NA all_variables.fillna("NA", inplace=True) # save to disk """ all_variables.to_csv("Sample.csv", index=False) """
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import intervention_costs_reported import pandas as pd # load json file load_json() # Get Intervention Costs Reported main data InterventionCosts = get_data(intervention_costs_reported) InterventionCosts_df = pd.DataFrame(InterventionCosts) InterventionCosts_df = InterventionCosts_df.T InterventionCosts_df.columns = ["int_cost_raw"] # Get Intervention Costs Reported highlighted text InterventionCosts_HT = highlighted_text(intervention_costs_reported) InterventionCosts_HT_df = pd.DataFrame(InterventionCosts_HT) InterventionCosts_HT_df = InterventionCosts_HT_df.T InterventionCosts_HT_df.columns = ["int_cost_ht"] # Get Intervention Costs Reported user comments InterventionCosts_Comments = comments(intervention_costs_reported) InterventionCosts_Comments_df = pd.DataFrame(InterventionCosts_Comments) InterventionCosts_Comments_df = InterventionCosts_Comments_df.T InterventionCosts_Comments_df.columns = ["int_cost_info"] # concatenate data frames intervention_costs_df = pd.concat([ InterventionCosts_df, InterventionCosts_HT_df, InterventionCosts_Comments_df ], axis=1,
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import intervention_time_output import pandas as pd # load json file load_json() ########################################### # DIGITAL TECHNOLOGY INTERVENTION INCLUSION ########################################### # Get Intervention Time main data InterventionTime = get_data(intervention_time_output) InterventionTime_df = pd.DataFrame(InterventionTime) InterventionTime_df = InterventionTime_df.T InterventionTime_df.columns = ["int_when_raw"] # Get Intervention Time highlighted text InterventionTime_HT = highlighted_text(intervention_time_output) InterventionTime_HT_df = pd.DataFrame(InterventionTime_HT) InterventionTime_HT_df = InterventionTime_HT_df.T InterventionTime_HT_df.columns = ["int_when_ht"] # Get Intervention Time user comments InterventionTime_Comments = comments(intervention_time_output) InterventionTime_Comments_df = pd.DataFrame(InterventionTime_Comments) InterventionTime_Comments_df = InterventionTime_Comments_df.T InterventionTime_Comments_df.columns = ["int_when_info"] # concatenate data frames intervention_time_df = pd.concat([
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import intervention_teaching_approach import pandas as pd # load json file load_json() # get intervention teaching approach data InterventionTeachingApproach = get_data(intervention_teaching_approach) InterventionTeachingApproach_df = pd.DataFrame(InterventionTeachingApproach) InterventionTeachingApproach_df = InterventionTeachingApproach_df.T InterventionTeachingApproach_df.columns = ["int_approach_raw"] # get intervention teaching approach highlighted text InterventionTeachingApproach_HT = highlighted_text(intervention_teaching_approach) InterventionTeachingApproach_HT_df = pd.DataFrame(InterventionTeachingApproach_HT) InterventionTeachingApproach_HT_df = InterventionTeachingApproach_HT_df.T InterventionTeachingApproach_HT_df.columns = ["int_approach_ht"] # get intervention teaching approach user comments InterventionTeachingApproach_Comments = comments(intervention_teaching_approach) InterventionTeachingApproach_Comments_df = pd.DataFrame(InterventionTeachingApproach_Comments) InterventionTeachingApproach_Comments_df = InterventionTeachingApproach_Comments_df.T InterventionTeachingApproach_Comments_df.columns = ["int_approach_info"] # concatenate data frames intervention_teaching_approach_df = pd.concat([ InterventionTeachingApproach_df, InterventionTeachingApproach_HT_df, InterventionTeachingApproach_Comments_df ], axis=1, sort=False)
from Main import get_data, highlighted_text, comments from AttributeIDList import clustering_output import pandas as pd # extract clustering data clustering = get_data(clustering_output) clustering_df = pd.DataFrame(clustering) clustering_df = clustering_df.T clustering_df.columns = ["clust_anal_raw"] # Get Baseline Differences highlighted text clustering_HT = highlighted_text(clustering_output) clustering_HT_df = pd.DataFrame(clustering_HT) clustering_HT_df = clustering_HT_df.T clustering_HT_df.columns = ["clust_anal_ht"] # Get Educational Setting user comments clustering_Comments = comments(clustering_output) clustering_Comments_df = pd.DataFrame(clustering_Comments) clustering_Comments_df = clustering_Comments_df.T clustering_Comments_df.columns = ["clust_anal_info"] # concatenate data frames clustering_df = pd.concat( [clustering_df, clustering_HT_df, clustering_Comments_df], axis=1, sort=False) # fill blanks with NA clustering_df.fillna("NA", inplace=True)
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import intervention_organisation_type_output import pandas as pd # load json file load_json() # get intervention organisation type main data InterventionOrgType = get_data(intervention_organisation_type_output) InterventionOrgType_df = pd.DataFrame(InterventionOrgType) InterventionOrgType_df = InterventionOrgType_df.T InterventionOrgType_df.columns = ["int_prov_raw"] # get intervention organisation type highlighted text InterventionOrgType_HT = highlighted_text( intervention_organisation_type_output) InterventionOrgType_HT_df = pd.DataFrame(InterventionOrgType_HT) InterventionOrgType_HT_df = InterventionOrgType_HT_df.T InterventionOrgType_HT_df.columns = ["int_prov_ht"] # get intervention organisation type user comments InterventionOrgType_Comments = comments(intervention_organisation_type_output) InterventionOrgType_Comments_df = pd.DataFrame(InterventionOrgType_Comments) InterventionOrgType_Comments_df = InterventionOrgType_Comments_df.T InterventionOrgType_Comments_df.columns = ["int_prov_info"] # concatenate data frames intervention_org_type = pd.concat([ InterventionOrgType_df, InterventionOrgType_HT_df, InterventionOrgType_Comments_df ],
from Main import get_data, comments, highlighted_text from AttributeIDList import attrition_dropout_reported_output from AttributeIDList import treatment_group_attrition from AttributeIDList import overall_percent_attrition import pandas as pd ############################### # ATTRITION DROP OUT REPORTED # ############################### # get attrition dropout reported data attrition_dropout_reported = get_data(attrition_dropout_reported_output) attrition_dropout_reported_df = pd.DataFrame(attrition_dropout_reported) attrition_dropout_reported_df = attrition_dropout_reported_df.T attrition_dropout_reported_df.columns = ["attri_raw"] # highlighted text attrition_dropout_reported_HT = highlighted_text( attrition_dropout_reported_output) attrition_dropout_reported_HT_df = pd.DataFrame(attrition_dropout_reported_HT) attrition_dropout_reported_HT_df = attrition_dropout_reported_HT_df.T attrition_dropout_reported_HT_df.columns = ["attri_ht"] # comments attrition_dropout_reported_Comments = comments( attrition_dropout_reported_output) attrition_dropout_reported_Comments_df = pd.DataFrame( attrition_dropout_reported_Comments) attrition_dropout_reported_Comments_df = attrition_dropout_reported_Comments_df.T attrition_dropout_reported_Comments_df.columns = ["attri_info"]