Python get_data示例，Main.get_data Python示例

示例#1

0

显示文件

文件： PublicationType.py 项目： JonathanReardon/ToolkitExtraction

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import publication_type_output
import pandas as pd

# load json file
load_json()

# get publication type data
publicationtype = get_data(publication_type_output)
publicationtype_df = pd.DataFrame(publicationtype)
publicationtype_df = publicationtype_df.T
publicationtype_df.columns = ["pub_type_raw"]

# Get Publication Type highlighted text
publicationtype_HT = highlighted_text(publication_type_output)
publicationtype_HT_df = pd.DataFrame(publicationtype_HT)
publicationtype_HT_df = publicationtype_HT_df.T
publicationtype_HT_df.columns = ["pubtype_ht"]

# Get Publication Type user comments
publicationtype_Comments = comments(publication_type_output)
publicationtype_Comments_df = pd.DataFrame(publicationtype_Comments)
publicationtype_Comments_df = publicationtype_Comments_df.T
publicationtype_Comments_df.columns = ["pubtype_info"]

# concatenate data frames
publication_type_df = pd.concat(
    [publicationtype_df, publicationtype_HT_df, publicationtype_Comments_df],
    axis=1,
    sort=False)

示例#2

0

显示文件

from Main import get_data, highlighted_text, comments
from AttributeIDList import edu_setting_output
import pandas as pd

# get educational setting data
edusetting = get_data(edu_setting_output)
edusetting_df = pd.DataFrame(edusetting)
edusetting_df = edusetting_df.T
edusetting_df.columns=["int_setting_raw"]

# binarize educational setting data
""" edusetting_df["int_setting_primary/elementary_school"] = edusetting_df["int_setting_raw"].map(set(['Primary/elementary school']).issubset).astype(int)
edusetting_df["int_setting_middle_school"] = edusetting_df["int_setting_raw"].map(set(['Middle school']).issubset).astype(int)
edusetting_df["int_setting_secondary/high_school"] = edusetting_df["int_setting_raw"].map(set(['Secondary/High school']).issubset).astype(int) """

# Get Educational Setting highlighted text
edusetting_HT = highlighted_text(edu_setting_output)
edusetting_HT_df = pd.DataFrame(edusetting_HT)
edusetting_HT_df = edusetting_HT_df.T
edusetting_HT_df.columns = ["int_setting_ht"]

# Get Educational Setting user comments
edusetting_Comments = comments(edu_setting_output)
edusetting_Comments_df = pd.DataFrame(edusetting_Comments)
edusetting_Comments_df = edusetting_Comments_df.T
edusetting_Comments_df.columns = ["int_setting_info"]

# concatenate data frames
educational_setting_df = pd.concat([
    edusetting_df, 
    edusetting_HT_df,

示例#3

0

显示文件

文件： TestType.py 项目： JonathanReardon/ToolkitExtraction

from Main import load_json, get_data, get_outcome_lvl2
from AttributeIDList import test_type_main, test_type_output
import pandas as pd

# load json file
load_json()

# get test type main extraction data
test_type_main = get_data(test_type_main)
test_type_main_df = pd.DataFrame(test_type_main)
test_type_main_df = test_type_main_df.T
test_type_main_df.columns = ["test_type_raw"]

test_type_main_df["test_type_standardised_test"] = test_type_main_df["test_type_raw"].map(
    set(['Standardised test']).issubset).astype(int)
test_type_main_df["test_type_researcher_developed_test"] = test_type_main_df["test_type_raw"].map(
    set(['Researcher developed test']).issubset).astype(int)
test_type_main_df["test_type_school_developed_test"] = test_type_main_df["test_type_raw"].map(
    set(['School-developed test']).issubset).astype(int)
test_type_main_df["test_type_normal_test_or_examination"] = test_type_main_df["test_type_raw"].map(
    set(['National test or examination']).issubset).astype(int)
test_type_main_df["test_type_international_tests"] = test_type_main_df["test_type_raw"].map(
    set(['International tests']).issubset).astype(int)

# get test type outcome data
testtype_outcome = get_outcome_lvl2(test_type_output)
testtype_outcome_df = pd.DataFrame(testtype_outcome)

# name each column (number depends on outcome number)
testtype_outcome_df.columns = [
    "out_test_type_raw_"+'{}'.format(column+1) for column in testtype_outcome_df.columns]

示例#4

0

显示文件

# https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups

low_income = [
    "Afghanistan", "Guinea-Bissau", "Sierra Leone", "Burkina Faso", "Haiti",
    "Somalia", "Burundi", "Korea, Dem. People's Rep.", "South Sudan",
    "Central African Republic", "Liberia", "Sudan", "Chad", "Madagascar",
    "Syrian Arab Republic", "Congo, Dem. Rep", "Malawi", "Tajikistan",
    "Eritrea", "Mali", "Togo"
    "Ethiopia", "Mozambique", "Uganda"
    "Gambia, The", "Niger", "Yemen, Rep.", "Guinea", "Rwanda"
]

##############################################

# get country data
country = get_data(countries)
country_df = pd.DataFrame(country)
country_df = country_df.T
country_df.columns = ["loc_country_raw"]

# get country highlighted text
country_HT = highlighted_text(countries)
country_HT_df = pd.DataFrame(country_HT)
country_HT_df = country_HT_df.T
country_HT_df.columns = ["loc_country_ht"]

# get country user comments
country_Comments = comments(countries)
country_Comments_df = pd.DataFrame(country_Comments)
country_Comments_df = country_Comments_df.T
country_Comments_df.columns = ["loc_country_info"]

示例#5

0

显示文件

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import other_outcomes_output
from AttributeIDList import additional_outcomes_output
from AttributeIDList import other_participants_output
import pandas as pd

# load json file
load_json()

#################
# Other outcomes
#################

# get other outcomes data
other_outcomes = get_data(other_outcomes_output)
other_outcomes_df = pd.DataFrame(other_outcomes)
other_outcomes_df = other_outcomes_df.T
other_outcomes_df.columns = ["out_other_raw"]

# get other outcomes highlighted text
other_outcomes_HT = highlighted_text(other_outcomes_output)
other_outcomes_HT_df = pd.DataFrame(other_outcomes_HT)
other_outcomes_HT_df = other_outcomes_HT_df.T
other_outcomes_HT_df.columns = ["out_other_ht"]

# get other outcomes comments
other_outcomes_info = comments(other_outcomes_output)
other_outcomes_info_df = pd.DataFrame(other_outcomes_info)
other_outcomes_info_df = other_outcomes_info_df.T
other_outcomes_info_df.columns = ["out_other_info"]

示例#6

0

显示文件

文件： Randomisation.py 项目： JonathanReardon/ToolkitExtraction

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import randomisation_details
import pandas as pd

# load json file
load_json()

# get randomisation data
randomisation = get_data(randomisation_details)
randomisation_df = pd.DataFrame(randomisation)
randomisation_df = randomisation_df.T
randomisation_df.columns = ["rand_raw"]

# Get Randomisation highlighted text
randomisation_HT = highlighted_text(randomisation_details)
randomisation_details_df = pd.DataFrame(randomisation_HT)
randomisation_details_df = randomisation_details_df.T
randomisation_details_df.columns = ["rand_ht"]

# Get Randomisation user comments
randomisation_Comments = comments(randomisation_details)
randomisation_Comments_df = pd.DataFrame(randomisation_Comments)
randomisation_Comments_df = randomisation_Comments_df.T
randomisation_Comments_df.columns = ["rand_info"]

# concatenate data frames
randomisation_df = pd.concat(
    [randomisation_df, randomisation_details_df, randomisation_Comments_df],
    axis=1,
    sort=False)

示例#7

0

显示文件

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import treatment_group
import pandas as pd

# load json file
load_json()

# get treatment group data
treatmentgroup = get_data(treatment_group)
treatmentgroup_df = pd.DataFrame(treatmentgroup)
treatmentgroup_df = treatmentgroup_df.T
treatmentgroup_df.columns = ["treat_group_raw"]

# get treatment group highlighted text
treatmentgroup_HT = highlighted_text(treatment_group)
treatmentgroup_HT_df = pd.DataFrame(treatmentgroup_HT)
treatmentgroup_HT_df = treatmentgroup_HT_df.T
treatmentgroup_HT_df.columns = ["treat_group_ht"]

# get treatment group user comments
treatmentgroup_Comments = comments(treatment_group)
treatmentgroup_Comments_df = pd.DataFrame(treatmentgroup_Comments)
treatmentgroup_Comments_df = treatmentgroup_Comments_df.T
treatmentgroup_Comments_df.columns = ["treat_group_info"]

# concatenate data frames
treatment_group_df = pd.concat(
    [treatmentgroup_df, treatmentgroup_HT_df, treatmentgroup_Comments_df],
    axis=1,
    sort=False)

示例#8

0

显示文件

文件： InterventionDetail.py 项目： JonathanReardon/ToolkitExtraction

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import intervention_implementation_details
import pandas as pd

# load json file
load_json()

# get intervention implementation detail data
InterventionDetail = get_data(intervention_implementation_details)
InterventionDetail_df = pd.DataFrame(InterventionDetail)
InterventionDetail_df = InterventionDetail_df.T
InterventionDetail_df.columns = ["int_fidel_raw"]

# get intervention implementation detail highlighted text
InterventionDetail_HT = highlighted_text(intervention_implementation_details)
InterventionDetail_HT_df = pd.DataFrame(InterventionDetail_HT)
InterventionDetail_HT_df = InterventionDetail_HT_df.T
InterventionDetail_HT_df.columns = ["int_fidel_ht"]

# get intervention implementation detail user comments
InterventionDetail_Comments = comments(intervention_implementation_details)
InterventionDetail_Comments_df = pd.DataFrame(InterventionDetail_Comments)
InterventionDetail_Comments_df = InterventionDetail_Comments_df.T
InterventionDetail_Comments_df.columns = ["int_fidel_info"]

# concatenate data frames
intervention_detail_df = pd.concat([
    InterventionDetail_df, InterventionDetail_HT_df,
    InterventionDetail_Comments_df
],
                                   axis=1,

示例#9

0

显示文件

文件： Comparability.py 项目： JonathanReardon/ToolkitExtraction

from Main import get_data, highlighted_text, comments
from AttributeIDList import comparability_output
import pandas as pd

# get comparability data
comparability = get_data(comparability_output)
comparability_df = pd.DataFrame(comparability)
comparability_df = comparability_df.T
comparability_df.columns = ["comp_anal_raw"]

# Get Baseline Differences highlighted text
comparability_HT = highlighted_text(comparability_output)
comparability_HT_df = pd.DataFrame(comparability_HT)
comparability_HT_df = comparability_HT_df.T
comparability_HT_df.columns = ["comp_anal_ht"]

# Get Educational Setting user comments
comparability_Comments = comments(comparability_output)
comparability_Comments_df = pd.DataFrame(comparability_Comments)
comparability_Comments_df = comparability_Comments_df.T
comparability_Comments_df.columns = ["comp_anal_info"]

# concatenate data frames
comparability_df = pd.concat(
    [comparability_df, comparability_HT_df, comparability_Comments_df],
    axis=1,
    sort=False)

# fill blanks with NA
comparability_df.fillna("NA", inplace=True)

示例#10

0

显示文件

from Main import get_data, highlighted_text, comments
from AttributeIDList import student_gender
import pandas as pd

# get gender data
gender = get_data(student_gender)
gender_df = pd.DataFrame(gender)
gender_df = gender_df.T
gender_df.columns = ["part_gen_raw"]
""" gender_df["part_gen_female_only"] = gender_df["part_gen_raw"].map(set(['Female only']).issubset).astype(int)
gender_df["part_gen_male_only"] = gender_df["part_gen_raw"].map(set(['Male only']).issubset).astype(int)
gender_df["part_gen_mixed_gender"] = gender_df["part_gen_raw"].map(set(['Mixed gender']).issubset).astype(int)
gender_df["part_gen_no_info_provided"] = gender_df["part_gen_raw"].map(set(['No information provided']).issubset).astype(int) """

# Get Gender highlighted text
gender_HT = highlighted_text(student_gender)
gender_HT_df = pd.DataFrame(gender_HT)
gender_HT_df = gender_HT_df.T
gender_HT_df.columns = ["part_gen_ht"]

# Get Gender user comments
gender_Comments = comments(student_gender)
gender_Comments_df = pd.DataFrame(gender_Comments)
gender_Comments_df = gender_Comments_df.T
gender_Comments_df.columns = ["part_gen_info"]

# concatenate data frames
gender_df = pd.concat([gender_df, gender_HT_df, gender_Comments_df],
                      axis=1,
                      sort=False)

示例#11

0

显示文件

further_ses_info_Comments_df = pd.DataFrame(further_ses_info_Comments)
further_ses_info_Comments_df = further_ses_info_Comments_df.T
further_ses_info_Comments_df.columns = ["fsm_info_info"]

# get further low ses info highlighted text
further_ses_fsm_info_HT = highlighted_text(further_ses_fsm_info_output)
further_ses_fsm_info_HT_df = pd.DataFrame(further_ses_fsm_info_HT)
further_ses_fsm_info_HT_df = further_ses_fsm_info_HT_df.T
further_ses_fsm_info_HT_df.columns = ["fsm_info_ht"]

#######################################
# NO LOW SES/FSM INFORMATION PROVIDED #
#######################################

# get now low ses info data
no_low_ses_fsm_info = get_data(no_ses_fsm_info_provided_output)
no_low_ses_fsm_info_df = pd.DataFrame(no_low_ses_fsm_info)
no_low_ses_fsm_info_df = no_low_ses_fsm_info_df.T
no_low_ses_fsm_info_df.columns = ["fsm_na_raw"]

# get no low ses info comments
no_low_ses_fsm_info_comments = comments(no_ses_fsm_info_provided_output)
no_low_ses_fsm_info_comments_df = pd.DataFrame(no_low_ses_fsm_info_comments)
no_low_ses_fsm_info_comments_df = no_low_ses_fsm_info_comments_df.T
no_low_ses_fsm_info_comments_df.columns = ["fsm_na_info"]
""" no_low_ses_fsm_info_df["No_SES_FSM_Info"]=no_low_ses_fsm_info_df["No_SES_FSM_Info_Provided"].map(set(['No SES/FSM Information Provided']).issubset).astype(int) """

# concatenate datafeames
ses_fsm_df = pd.concat([
    low_ses_proportion_Comments_df, low_ses_proportion_HT_df,
    low_ses_percentage_Comments_df, low_ses_percentage_HT_df,

示例#12

0

显示文件

文件： StudyRealism.py 项目： JonathanReardon/ToolkitExtraction

from Main import load_json, get_data, comments, highlighted_text
from CODES import study_realism_output
import pandas as pd

# load json file
load_json()

# get study realism data
studyrealism = get_data(study_realism_output)
studyrealism_df = pd.DataFrame(studyrealism)
studyrealism_df = studyrealism_df.T
studyrealism_df.columns = ["eco_valid_raw"]

# get study realism highlighted text
studyrealism_HT = highlighted_text(study_realism_output)
studyrealism_HT_df = pd.DataFrame(studyrealism_HT)
studyrealism_HT_df = studyrealism_HT_df.T
studyrealism_HT_df.columns = ["eco_valid_ht"]

# get study realism user comments
studyrealism_Comments = comments(study_realism_output)
studyrealism_Comments_df = pd.DataFrame(studyrealism_Comments)
studyrealism_Comments_df = studyrealism_Comments_df.T
studyrealism_Comments_df.columns = ["eco_valid_info"]

# concatenate data frames
study_realism_df = pd.concat(
    [studyrealism_df, studyrealism_HT_df, studyrealism_Comments_df],
    axis=1,
    sort=False)

示例#13

0

显示文件

from Main import get_data, comments
from AttributeIDList import admin_strand_output, admin_strand_secondary
import pandas as pd

# get admin strand data
admin_strand = get_data(admin_strand_output)
adminstrand_df = pd.DataFrame(admin_strand)
adminstrand_df = adminstrand_df.T
adminstrand_df.columns = ["strand_raw"]

# get admin strand update data (if it exists) [temp]
admin_strand_other = get_data(admin_strand_secondary)
adminstrand_secondary_df = pd.DataFrame(admin_strand_other)
adminstrand_secondary_df = adminstrand_secondary_df.T
adminstrand_secondary_df.columns = ["SGT_Update_2020"]

# Get Strand comment data
admin_strand_comments = comments(admin_strand_output)
admin_strand_comments_df = pd.DataFrame(admin_strand_comments)
admin_strand_comments_df = admin_strand_comments_df.T
admin_strand_comments_df.columns = ["strand_info"]

# concatenate data frames
admin_strand_df = pd.concat([adminstrand_df, admin_strand_comments_df],
                            axis=1,
                            sort=False)

# remove problematic text
admin_strand_df.replace('\r', ' ', regex=True, inplace=True)
admin_strand_df.replace('\n', ' ', regex=True, inplace=True)
admin_strand_df.replace(':', ' ', regex=True, inplace=True)

示例#14

0

显示文件

from Main import get_data, highlighted_text, comments
from AttributeIDList import comparabiltiy_vars_reported
from AttributeIDList import if_yes_which_comparability_variables_reported_output
import pandas as pd

#####################################################
# Are the variables used for comparability reported?
#####################################################

# get comparability variables reported data
comparability_vars_reported = get_data(comparabiltiy_vars_reported)
comparability_vars_reported_df = pd.DataFrame(comparability_vars_reported)
comparability_vars_reported_df = comparability_vars_reported_df.T
comparability_vars_reported_df.columns = ["comp_var_rep_raw"]

# Get Comparability Variables Reported highlighted text
comparability_vars_reported_HT = highlighted_text(comparabiltiy_vars_reported)
comparability_vars_reported_HT_df = pd.DataFrame(
    comparability_vars_reported_HT)
comparability_vars_reported_HT_df = comparability_vars_reported_HT_df.T
comparability_vars_reported_HT_df.columns = ["comp_var_rep_ht"]

# Get Comparability Variables Reported user comments
comparability_vars_reported_Comments = comments(comparabiltiy_vars_reported)
comparability_vars_reported_Comments_df = pd.DataFrame(
    comparability_vars_reported_Comments)
comparability_vars_reported_Comments_df = comparability_vars_reported_Comments_df.T
comparability_vars_reported_Comments_df.columns = ["comp_var_rep_info"]

######################################################
# If yes, which variables are used for comparability?

示例#15

0

显示文件

from Main import get_data, highlighted_text, comments
from AttributeIDList import curriculum_subjects
from AttributeIDList import other_outcomes_output
from AttributeIDList import which_other_outcomes_output
from AttributeIDList import other_participants_output
import pandas as pd

# get curriculum subjects data
curriculumsubjects = get_data(curriculum_subjects)
curriculumsubjects_df = pd.DataFrame(curriculumsubjects)
curriculumsubjects_df = curriculumsubjects_df.T
curriculumsubjects_df.columns = ["test_subject_raw"]

# binarize curriculum subject options
''' curriculumsubjects_df["test_subject_literacy_(first_language)"] = curriculumsubjects_df["test_subject_raw"].map(set(['Literacy (first language)']).issubset).astype(int)
curriculumsubjects_df["test_subject_reading_Comprehension"] = curriculumsubjects_df["test_subject_raw"].map(set(['Reading comprehension']).issubset).astype(int)
curriculumsubjects_df["test_subject_decoding/phonics"] = curriculumsubjects_df["test_subject_raw"].map(set(['Decoding/phonics']).issubset).astype(int)
curriculumsubjects_df["test_subject_spelling"] = curriculumsubjects_df["test_subject_raw"].map(set(['Spelling']).issubset).astype(int)
curriculumsubjects_df["test_subject_reading_other"] = curriculumsubjects_df["test_subject_raw"].map(set(['Reading other']).issubset).astype(int)
curriculumsubjects_df["test_subject_speaking_and_listening/oral_language"] = curriculumsubjects_df["test_subject_raw"].map(set(['Speaking and listening/Oral language']).issubset).astype(int)
curriculumsubjects_df["test_subject_writing"] = curriculumsubjects_df["test_subject_raw"].map(set(['Writing']).issubset).astype(int)
curriculumsubjects_df["test_subject_mathematics"] = curriculumsubjects_df["test_subject_raw"].map(set(['Mathematics']).issubset).astype(int)
curriculumsubjects_df["test_subject_science"] = curriculumsubjects_df["test_subject_raw"].map(set(['Science']).issubset).astype(int)
curriculumsubjects_df["test_subject_social_studies"] = curriculumsubjects_df["test_subject_raw"].map(set(['Social studies']).issubset).astype(int)
curriculumsubjects_df["test_subject_arts"] = curriculumsubjects_df["test_subject_raw"].map(set(['Arts']).issubset).astype(int)
curriculumsubjects_df["test_subject_languages"] = curriculumsubjects_df["test_subject_raw"].map(set(['Languages']).issubset).astype(int)
curriculumsubjects_df["test_subject_other_curriculum_test"] = curriculumsubjects_df["test_subject_raw"].map(set(['Other curriculum test']).issubset).astype(int) '''

# Get Country highlighted text
curriculumsubjects_HT = highlighted_text(curriculum_subjects)
curriculumsubjects_HT_df = pd.DataFrame(curriculumsubjects_HT)

示例#16

0

显示文件

from Main import load_json, get_data, comments, highlighted_text
from CODES import intervention_evaluation
import pandas as pd

# load json file
load_json()

# get intervention costs reported main data
InterventionEvaluation = get_data(intervention_evaluation)
InterventionEvaluation_df = pd.DataFrame(InterventionEvaluation)
InterventionEvaluation_df = InterventionEvaluation_df.T
InterventionEvaluation_df.columns = ["out_eval_raw"]

InterventionEvaluation_df["eef_eval_raw"] = InterventionEvaluation_df[
    "out_eval_raw"].map(set(["Is this an EEF evaluation?"
                             ]).issubset).astype(int)
InterventionEvaluation_df["eef_eval_raw"] = InterventionEvaluation_df[
    "eef_eval_raw"].replace(to_replace=[0, 1], value=["No", "Yes"])

# get intervention costs reported highlighted text
InterventionEvaluation_HT = highlighted_text(intervention_evaluation)
InterventionEvaluation_HT_df = pd.DataFrame(InterventionEvaluation_HT)
InterventionEvaluation_HT_df = InterventionEvaluation_HT_df.T
InterventionEvaluation_HT_df.columns = ["out_eval_ht"]

# get intervention costs reported user comments
InterventionEvaluation_Comments = comments(intervention_evaluation)
InterventionEvaluation_Comments_df = pd.DataFrame(
    InterventionEvaluation_Comments)
InterventionEvaluation_Comments_df = InterventionEvaluation_Comments_df.T
InterventionEvaluation_Comments_df.columns = ["out_eval_info"]

示例#17

0

显示文件

文件： NumberofClasses.py 项目： JonathanReardon/ToolkitExtraction

    number_of_classes_total_Comments)
number_of_classes_total_Comments_df = number_of_classes_total_Comments_df.T
number_of_classes_total_Comments_df.columns = ["class_total_info"]

# get number of classes total highlighted text data
number_of_classes_total_HT = highlighted_text(number_of_classes_total_output)
number_of_classes_total_HT_df = pd.DataFrame(number_of_classes_total_HT)
number_of_classes_total_HT_df = number_of_classes_total_HT_df.T
number_of_classes_total_HT_df.columns = ["class_total_ht"]

#########################################################
# NUMBER OF CLASSES NOT PROVIDED/UNCLEAR/NOT APPLICABLE #
#########################################################

# get number of classes not provided data
number_of_classes_np = get_data(number_of_classes_not_provided_output)
number_of_classes_np_df = pd.DataFrame(number_of_classes_np)
number_of_classes_np_df = number_of_classes_np_df.T
number_of_classes_np_df.columns = ["class_na_raw"]

# get number of classes not provided comments data
number_of_classes_not_provided_Comments = comments(
    number_of_classes_not_provided_output)
number_of_classes_not_provided_Comments_df = pd.DataFrame(
    number_of_classes_not_provided_Comments)
number_of_classes_not_provided_Comments_df = number_of_classes_not_provided_Comments_df.T
number_of_classes_not_provided_Comments_df.columns = ["class_na_info"]

# get number of classes not provided highlighted text data
number_of_classes_not_provided_HT = highlighted_text(
    number_of_classes_not_provided_output)

示例#18

0

显示文件

文件： Age.py 项目： JonathanReardon/ToolkitExtraction

from Main import get_data, highlighted_text, comments
from AttributeIDList import student_age_output
import pandas as pd

# get age data
student_age = get_data(student_age_output)
student_age_df = pd.DataFrame(student_age)
student_age_df = student_age_df.T
student_age_df.columns = ["part_age_raw"]
""" student_age_df["part_age_3"]=student_age_df["part_age_raw"].map(set(['3']).issubset).astype(int)
student_age_df["part_age_4"]=student_age_df["part_age_raw"].map(set(['4']).issubset).astype(int)
student_age_df["part_age_5"]=student_age_df["part_age_raw"].map(set(['5']).issubset).astype(int)
student_age_df["part_age_6"]=student_age_df["part_age_raw"].map(set(['6']).issubset).astype(int)
student_age_df["part_age_7"]=student_age_df["part_age_raw"].map(set(['7']).issubset).astype(int)
student_age_df["part_age_8"]=student_age_df["part_age_raw"].map(set(['8']).issubset).astype(int)
student_age_df["part_age_9"]=student_age_df["part_age_raw"].map(set(['9']).issubset).astype(int)
student_age_df["part_age_10"]=student_age_df["part_age_raw"].map(set(['10']).issubset).astype(int)
student_age_df["part_age_11"]=student_age_df["part_age_raw"].map(set(['11']).issubset).astype(int)
student_age_df["part_age_12"]=student_age_df["part_age_raw"].map(set(['12']).issubset).astype(int)
student_age_df["part_age_13"]=student_age_df["part_age_raw"].map(set(['13']).issubset).astype(int)
student_age_df["part_age_14"]=student_age_df["part_age_raw"].map(set(['14']).issubset).astype(int)
student_age_df["part_age_15"]=student_age_df["part_age_raw"].map(set(['15']).issubset).astype(int)
student_age_df["part_age_16"]=student_age_df["part_age_raw"].map(set(['16']).issubset).astype(int)
student_age_df["part_age_17"]=student_age_df["part_age_raw"].map(set(['17']).issubset).astype(int)
student_age_df["part_age_18"]=student_age_df["part_age_raw"].map(set(['18']).issubset).astype(int)

student_age_df["part_age_no_information_provided"]=student_age_df["part_age_raw"].map(set(['No information provided']).issubset).astype(int) """

# get student age highlighted text
student_age_HT = highlighted_text(student_age_output)
student_age_HT_df = pd.DataFrame(student_age_HT)

示例#19

0

显示文件

文件： PrimaryOutcomeDescStatsControlGroup_TWO.py 项目： JonathanReardon/ToolkitExtraction

ControlGroupOtherInfo_HT_df = pd.DataFrame(ControlGroupOtherInfo_HT)
ControlGroupOtherInfo_HT_df = ControlGroupOtherInfo_HT_df.T
ControlGroupOtherInfo_HT_df.columns = ["out_c2_other_ht"]

# Get Control Group Other Information comments
ControlGroupOtherInfo_comments = comments(control_group_two_any_other_info)
ControlGroupOtherInfo_comments_df = pd.DataFrame(ControlGroupOtherInfo_comments)
ControlGroupOtherInfo_comments_df = ControlGroupOtherInfo_comments_df.T
ControlGroupOtherInfo_comments_df.columns = ["out_c2_other_info"]

########################
# Follow up data?
########################

# Get Follow Up Data
followupdata = get_data(follow_up_data_reported)
followupdata_df = pd.DataFrame(followupdata)
followupdata_df = followupdata_df.T
followupdata_df.columns = ["follow_up_raw"]

# Get Follow Up Data highlighted text
followupdata_HT = highlighted_text(follow_up_data_reported)
followupdata_HT_df = pd.DataFrame(followupdata_HT)
followupdata_HT_df = followupdata_HT_df.T
followupdata_HT_df.columns = ["follow_up_ht"]

# Get Follow Up Data comments
followupdata_comments = comments(follow_up_data_reported)
followupdata_comments_df = pd.DataFrame(followupdata_comments)
followupdata_comments_df = followupdata_comments_df.T
followupdata_comments_df.columns = ["follow_up_info"]

示例#20

0

显示文件

文件： NumberofSchools.py 项目： JonathanReardon/ToolkitExtraction

    number_of_schools_total_Comments)
number_of_schools_total_Comments_df = number_of_schools_total_Comments_df.T
number_of_schools_total_Comments_df.columns = ["school_total_info"]

# get total number of schools highlighted text data
number_of_schools_total_HT = highlighted_text(number_of_schools_total_output)
number_of_schools_total_HT_df = pd.DataFrame(number_of_schools_total_HT)
number_of_schools_total_HT_df = number_of_schools_total_HT_df.T
number_of_schools_total_HT_df.columns = ["school_total_ht"]

#########################################################
# NUMBER OF SCHOOLS NOT PROVIDED/UNCLEAR/NOT APPLICABLE #
#########################################################

# get number of schools not provided data
number_of_schools_np = get_data(number_of_schools_not_provided_output)
number_of_schools_np_df = pd.DataFrame(number_of_schools_np)
number_of_schools_np_df = number_of_schools_np_df.T
number_of_schools_np_df.columns = ["school_na_raw"]

# get number of schools not provided comments data
number_of_schools_np_Comments = comments(number_of_schools_not_provided_output)
number_of_schools_np_Comments_df = pd.DataFrame(number_of_schools_np_Comments)
number_of_schools_np_Comments_df = number_of_schools_np_Comments_df.T
number_of_schools_np_Comments_df.columns = ["school_na_info"]

# get number of schools not provided highlighted text data
number_of_schools_np_HT = highlighted_text(
    number_of_schools_not_provided_output)
number_of_schools_np_HT_df = pd.DataFrame(number_of_schools_np_HT)
number_of_schools_np_HT_df = number_of_schools_np_HT_df.T

示例#21

0

显示文件

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import intervention_approach_digital_technology
from AttributeIDList import intervention_approach_parents_or_community_volunteers
import pandas as pd

# load json file
load_json()

###########################################
# DIGITAL TECHNOLOGY INTERVENTION INCLUSION
###########################################

# Get Digital Technology (inclusion) main data
DigitalTechnology = get_data(intervention_approach_digital_technology)
DigitalTechnology_df = pd.DataFrame(DigitalTechnology)
DigitalTechnology_df = DigitalTechnology_df.T
DigitalTechnology_df.columns = ["digit_tech_raw"]

# Get Digital Technology (inclusion) highlighted text
DigitalTechnology_HT = highlighted_text(
    intervention_approach_digital_technology)
DigitalTechnology_HT_df = pd.DataFrame(DigitalTechnology_HT)
DigitalTechnology_HT_df = DigitalTechnology_HT_df.T
DigitalTechnology_HT_df.columns = ["digit_tech_ht"]

# Get Digital Technology (inclusion) user comments
DigitalTechnology_Comments = comments(intervention_approach_digital_technology)
DigitalTechnology_Comments_df = pd.DataFrame(DigitalTechnology_Comments)
DigitalTechnology_Comments_df = DigitalTechnology_Comments_df.T
DigitalTechnology_Comments_df.columns = ["digit_tech_info"]

示例#22

0

显示文件

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import intervention_delivery_output
import pandas as pd

# load json file
load_json()

# get intervention delivery data
InterventionDelivery = get_data(intervention_delivery_output)
interventiondelivery_df = pd.DataFrame(InterventionDelivery)
interventiondelivery_df = interventiondelivery_df.T
interventiondelivery_df.columns = ["int_who_raw"]

# get intervention delivery highlighted text
InterventionDelivery_HT = highlighted_text(intervention_delivery_output)
InterventionDelivery_HT_df = pd.DataFrame(InterventionDelivery_HT)
InterventionDelivery_HT_df = InterventionDelivery_HT_df.T
InterventionDelivery_HT_df.columns = ["int_who_ht"]

# get intervention delivery user comments
InterventionDelivery_Comments = comments(intervention_delivery_output)
InterventionDelivery_Comments_df = pd.DataFrame(InterventionDelivery_Comments)
InterventionDelivery_Comments_df = InterventionDelivery_Comments_df.T
InterventionDelivery_Comments_df.columns = ["int_who_info"]

# concatenate data frames
intervention_delivery_df = pd.concat([
    interventiondelivery_df, 
    InterventionDelivery_HT_df, 
    InterventionDelivery_Comments_df
], axis=1, sort=False)

示例#23

0

显示文件

文件： Baseline_Differences.py 项目： JonathanReardon/ToolkitExtraction

from Main import get_data, highlighted_text, comments
from AttributeIDList import baseline_differences_output
import pandas as pd

# extract baseline differences data
baselinedifferences = get_data(baseline_differences_output)
baselinedifferences_df = pd.DataFrame(baselinedifferences)
baselinedifferences_df = baselinedifferences_df.T
baselinedifferences_df.columns=["base_diff_raw"]

# Get Baseline Differences highlighted text
baselinedifferences_HT = highlighted_text(baseline_differences_output)
baselinedifferences_HT_df = pd.DataFrame(baselinedifferences_HT)
baselinedifferences_HT_df = baselinedifferences_HT_df.T
baselinedifferences_HT_df.columns = ["base_diff_ht"]

# Get Educational Setting user comments
baselinedifferences_Comments = comments(baseline_differences_output)
baselinedifferences_Comments_df = pd.DataFrame(baselinedifferences_Comments)
baselinedifferences_Comments_df = baselinedifferences_Comments_df.T
baselinedifferences_Comments_df.columns = ["base_diff_info"]

# concatenate data frames
baseline_differences_df = pd.concat([
    baselinedifferences_df, 
    baselinedifferences_HT_df, 
    baselinedifferences_Comments_df
], axis=1, sort=False)

# fill blanks with NA
baseline_differences_df.fillna("NA", inplace=True)

示例#24

0

显示文件

文件： Sample.py 项目： JonathanReardon/ToolkitExtraction

from AttributeIDList import sample_output
import pandas as pd

# load json file
load_json()

# get sample data
sample = get_outcome_lvl2(sample_output)
sample_df = pd.DataFrame(sample)

# name each column (number depends on outcome number)
sample_df.columns = [
    "out_samp_" + '{}'.format(column + 1) for column in sample_df.columns
]

# get sample main check data
sample_main_check = get_data(sample_output)
sample_main_check_df = pd.DataFrame(sample_main_check)
sample_main_check_df = sample_main_check_df.T
sample_main_check_df.columns = ["main_check"]

# concatenate dataframes
all_variables = pd.concat([sample_df, sample_main_check_df],
                          axis=1,
                          sort=False)

# fill blanks with NA
all_variables.fillna("NA", inplace=True)

# save to disk
""" all_variables.to_csv("Sample.csv", index=False) """

示例#25

0

显示文件

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import intervention_costs_reported
import pandas as pd

# load json file
load_json()

# Get Intervention Costs Reported main data
InterventionCosts = get_data(intervention_costs_reported)
InterventionCosts_df = pd.DataFrame(InterventionCosts)
InterventionCosts_df = InterventionCosts_df.T
InterventionCosts_df.columns = ["int_cost_raw"]

# Get Intervention Costs Reported highlighted text
InterventionCosts_HT = highlighted_text(intervention_costs_reported)
InterventionCosts_HT_df = pd.DataFrame(InterventionCosts_HT)
InterventionCosts_HT_df = InterventionCosts_HT_df.T
InterventionCosts_HT_df.columns = ["int_cost_ht"]

# Get Intervention Costs Reported user comments
InterventionCosts_Comments = comments(intervention_costs_reported)
InterventionCosts_Comments_df = pd.DataFrame(InterventionCosts_Comments)
InterventionCosts_Comments_df = InterventionCosts_Comments_df.T
InterventionCosts_Comments_df.columns = ["int_cost_info"]

# concatenate data frames
intervention_costs_df = pd.concat([
    InterventionCosts_df, InterventionCosts_HT_df,
    InterventionCosts_Comments_df
],
                                  axis=1,

示例#26

0

显示文件

文件： InterventionTime.py 项目： JonathanReardon/ToolkitExtraction

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import intervention_time_output
import pandas as pd

# load json file
load_json()

###########################################
# DIGITAL TECHNOLOGY INTERVENTION INCLUSION
###########################################

# Get Intervention Time main data
InterventionTime = get_data(intervention_time_output)
InterventionTime_df = pd.DataFrame(InterventionTime)
InterventionTime_df = InterventionTime_df.T
InterventionTime_df.columns = ["int_when_raw"]

# Get Intervention Time highlighted text
InterventionTime_HT = highlighted_text(intervention_time_output)
InterventionTime_HT_df = pd.DataFrame(InterventionTime_HT)
InterventionTime_HT_df = InterventionTime_HT_df.T
InterventionTime_HT_df.columns = ["int_when_ht"]

# Get Intervention Time user comments
InterventionTime_Comments = comments(intervention_time_output)
InterventionTime_Comments_df = pd.DataFrame(InterventionTime_Comments)
InterventionTime_Comments_df = InterventionTime_Comments_df.T
InterventionTime_Comments_df.columns = ["int_when_info"]

# concatenate data frames
intervention_time_df = pd.concat([

示例#27

0

显示文件

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import intervention_teaching_approach
import pandas as pd

# load json file
load_json()

# get intervention teaching approach data
InterventionTeachingApproach = get_data(intervention_teaching_approach)
InterventionTeachingApproach_df = pd.DataFrame(InterventionTeachingApproach)
InterventionTeachingApproach_df = InterventionTeachingApproach_df.T
InterventionTeachingApproach_df.columns = ["int_approach_raw"]

# get intervention teaching approach highlighted text
InterventionTeachingApproach_HT = highlighted_text(intervention_teaching_approach)
InterventionTeachingApproach_HT_df = pd.DataFrame(InterventionTeachingApproach_HT)
InterventionTeachingApproach_HT_df = InterventionTeachingApproach_HT_df.T
InterventionTeachingApproach_HT_df.columns = ["int_approach_ht"]

# get intervention teaching approach user comments
InterventionTeachingApproach_Comments = comments(intervention_teaching_approach)
InterventionTeachingApproach_Comments_df = pd.DataFrame(InterventionTeachingApproach_Comments)
InterventionTeachingApproach_Comments_df = InterventionTeachingApproach_Comments_df.T
InterventionTeachingApproach_Comments_df.columns = ["int_approach_info"]

# concatenate data frames
intervention_teaching_approach_df = pd.concat([
    InterventionTeachingApproach_df, 
    InterventionTeachingApproach_HT_df, 
    InterventionTeachingApproach_Comments_df
], axis=1, sort=False)

示例#28

0

显示文件

文件： Clustering.py 项目： JonathanReardon/ToolkitExtraction

from Main import get_data, highlighted_text, comments
from AttributeIDList import clustering_output
import pandas as pd

# extract clustering data
clustering = get_data(clustering_output)
clustering_df = pd.DataFrame(clustering)
clustering_df = clustering_df.T
clustering_df.columns = ["clust_anal_raw"]

# Get Baseline Differences highlighted text
clustering_HT = highlighted_text(clustering_output)
clustering_HT_df = pd.DataFrame(clustering_HT)
clustering_HT_df = clustering_HT_df.T
clustering_HT_df.columns = ["clust_anal_ht"]

# Get Educational Setting user comments
clustering_Comments = comments(clustering_output)
clustering_Comments_df = pd.DataFrame(clustering_Comments)
clustering_Comments_df = clustering_Comments_df.T
clustering_Comments_df.columns = ["clust_anal_info"]

# concatenate data frames
clustering_df = pd.concat(
    [clustering_df, clustering_HT_df, clustering_Comments_df],
    axis=1,
    sort=False)

# fill blanks with NA
clustering_df.fillna("NA", inplace=True)

示例#29

0

显示文件

from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import intervention_organisation_type_output
import pandas as pd

# load json file
load_json()

# get intervention organisation type main data
InterventionOrgType = get_data(intervention_organisation_type_output)
InterventionOrgType_df = pd.DataFrame(InterventionOrgType)
InterventionOrgType_df = InterventionOrgType_df.T
InterventionOrgType_df.columns = ["int_prov_raw"]

# get intervention organisation type highlighted text
InterventionOrgType_HT = highlighted_text(
    intervention_organisation_type_output)
InterventionOrgType_HT_df = pd.DataFrame(InterventionOrgType_HT)
InterventionOrgType_HT_df = InterventionOrgType_HT_df.T
InterventionOrgType_HT_df.columns = ["int_prov_ht"]

# get intervention organisation type user comments
InterventionOrgType_Comments = comments(intervention_organisation_type_output)
InterventionOrgType_Comments_df = pd.DataFrame(InterventionOrgType_Comments)
InterventionOrgType_Comments_df = InterventionOrgType_Comments_df.T
InterventionOrgType_Comments_df.columns = ["int_prov_info"]

# concatenate data frames
intervention_org_type = pd.concat([
    InterventionOrgType_df, InterventionOrgType_HT_df,
    InterventionOrgType_Comments_df
],

示例#30

0

显示文件

from Main import get_data, comments, highlighted_text
from AttributeIDList import attrition_dropout_reported_output
from AttributeIDList import treatment_group_attrition
from AttributeIDList import overall_percent_attrition
import pandas as pd

###############################
# ATTRITION DROP OUT REPORTED #
###############################

# get attrition dropout reported data
attrition_dropout_reported = get_data(attrition_dropout_reported_output)
attrition_dropout_reported_df = pd.DataFrame(attrition_dropout_reported)
attrition_dropout_reported_df = attrition_dropout_reported_df.T
attrition_dropout_reported_df.columns = ["attri_raw"]

# highlighted text
attrition_dropout_reported_HT = highlighted_text(
    attrition_dropout_reported_output)
attrition_dropout_reported_HT_df = pd.DataFrame(attrition_dropout_reported_HT)
attrition_dropout_reported_HT_df = attrition_dropout_reported_HT_df.T
attrition_dropout_reported_HT_df.columns = ["attri_ht"]

# comments
attrition_dropout_reported_Comments = comments(
    attrition_dropout_reported_output)
attrition_dropout_reported_Comments_df = pd.DataFrame(
    attrition_dropout_reported_Comments)
attrition_dropout_reported_Comments_df = attrition_dropout_reported_Comments_df.T
attrition_dropout_reported_Comments_df.columns = ["attri_info"]