Exemplo n.º 1
0
def backupProject(api_url, api_key, date_dir, date_string, skip_files=False):
    """Create a backup of a REDCap project, including the """
    # Get project info.
    project_info = exportProjectInfo(api_url, api_key)
    if ("error" in project_info):
        print
        print "*************************************"
        print Color.red + "ERROR:" + Color.end
        print project_info["error"]
        print "Project may have failed to backup."
        print "*************************************"
        print
        return
    project_id = str(project_info["project_id"])
    project_title_nospace = project_info["project_title"].replace(" ", "_")

    print "Backing up: " + Color.green + project_info[
        "project_title"] + Color.end + " (project ID: " + Color.green + project_id + Color.end + ")"

    # Create directory for project.
    project_dir_name = project_id + "_" + project_title_nospace
    project_dir = os.path.join(date_dir, project_dir_name)
    if (not os.path.isdir(project_dir)):
        os.mkdir(project_dir)

    # Define suffix to append to each file.
    file_suffix = "_" + date_string + "_" + project_dir_name

    # Backup project XML (not including records).
    project_xml = exportProjectXML(api_url, api_key)
    project_xml_path = os.path.join(project_dir,
                                    "project_xml" + file_suffix + ".xml")
    with open(project_xml_path, 'w') as fh:
        fh.write(project_xml)

    # Backup project records.
    records = exportRecords(api_url,
                            api_key,
                            format="csv",
                            export_form_completion=True)
    records_path = os.path.join(project_dir, "records" + file_suffix + ".csv")
    with open(records_path, 'w') as fh:
        fh.write(records)

    # Backup files stored in File Upload fields. This does not include files stored in the File Repository.
    if (not skip_files):
        project_files_dir = os.path.join(project_dir,
                                         'files_in_file_upload_fields')
        exportFiles(api_url, api_key, project_files_dir, flat=False)
    print

    return
Exemplo n.º 2
0
def importRecords(
    api_url,
    api_key,
    records_src,
    overwrite='normal',
    format='json',
    quick=False,
    quiet=False,
    return_content='ids',
    size_thres=20000
):  # size_thres = 300000 has not caused error [2019-05-08 ACTUALLY, MAYBE IT HAS]
    # Load project.
    project = redcap.Project(api_url, api_key)
    project_info = exportProjectInfo(api_url, api_key)

    if (not quiet):
        # Ask for user confirmation before proceeding.
        print "Data will be imported to the following project:"
        print "-------------------------------------------------------------------------------------------------"
        print "Project Title: " + Color.blue + project_info[
            "project_title"] + Color.end
        print "Project ID   : " + Color.blue + str(
            project_info["project_id"]) + Color.end
        print "-------------------------------------------------------------------------------------------------"
        #    cont = bool(raw_input("Please verify that this is project you wish to modify. Continue y/[n]? ") == 'y')
        cont = bool(raw_input("Continue y/[n]? ") == 'y')
        if (not cont):
            print "Quitting"
            sys.exit()
        if (overwrite == "overwrite"):
            cont = bool(
                raw_input(
                    "You have selected to overwrite fields with blanks. Continue y/[n]? "
                ) == 'y')
            if (not cont):
                print "Quitting"
                sys.exit()

    # If records_src is a CSV string, convert it to a list of dicts. Then proceed using the same method as for format='json'.
    if (format == 'csv'):
        reader = csv.DictReader(StringIO.StringIO(records_src))
        records_src = []
        #        print 'WARNING: Not converting import data to unicode before PyCap import_records() call'
        for line in reader:  # each line is a dict (csv version < 3.6) or OrderedDict (csv version >= 3.6). Convert each line to dict for consistency.
            line_dict = {
                key.decode('utf-8'): value.decode('utf-8')
                for key, value in line.iteritems()
            }  # doesn't seem necessary to convert the keys and values to unicode; seems to work the same either way.
            #            line_dict = dict(line)
            records_src.append(line_dict)

    # Compare source and destination data. Continue if user confirms changes.
    if (not quick):
        records_src_copy = copy.deepcopy(records_src)
        if (not verifyChanges(api_url, api_key, records_src, project.def_field,
                              project_info, overwrite)):
            print "Quitting"
            sys.exit()

    # Determine size of imported data. I DON'T KNOW THE APPROPRIATE WAY TO DETERMINE THE "SIZE", NOR THE SIZE LIMIT. IF IT DOESN'T WORK WITH THE CURRENT SETTING, REDUCE THE SIZE LIMIT.
    num_row = len(records_src)
    num_col = len(records_src[0])
    num_cells = num_row * num_col

    failure_msg = "Import appears to have failed, likely because the input data is too large. Review the logging information in REDCap online to verify import failure, and change data chunk size by adjusting 'size_thres'."

    # Import data
    if (num_cells < size_thres):
        if (not quiet):
            print "Importing data in one piece"
#        return_info = project.import_records(records_src, overwrite=overwrite, format=format, return_content=return_content)

#        print "importing records as type:", type(records_src), "(", type(records_src[0]), "(", type(records_src[0][records_src[0].keys()[0]]), " ) )"

        return_info = project.import_records(
            records_src,
            overwrite=overwrite,
            format='json',
            return_content=return_content
        )  # If format was 'csv', records_src has been converted to 'json' by this point.

        # Print information returned from REDCap.
        if (return_content == 'count'):
            try:
                num_modified = return_info["count"]
                if (not quiet):
                    print "Number of records imported: " + str(num_modified)
            except KeyError:
                print failure_msg
                sys.exit()
        elif (return_content == 'ids'):
            if (return_info != {}):
                if (not quiet):
                    print "Number of records imported: " + str(
                        len(return_info))
                    print "IDs of records imported:"
                    id_string = ""
                    for id in return_info:
                        id_string += id + " "
                    id_string = id_string.rstrip()
                    print id_string
            else:
                print failure_msg
                sys.exit()
    else:
        row_chunk_size = size_thres / num_col  # Python 2 rounds down integers after division (desired)
        if (not quiet):
            print "Importing records " + str(
                row_chunk_size) + " rows at a time"

        if (return_content == 'count'):
            num_modified = 0
        elif (return_content == 'ids'):
            ids_imported = []

        # Slice the data into chunks of size <= size_thres
        num_chunks = int(math.ceil(float(num_row) / float(row_chunk_size)))
        for chunk_index in range(num_chunks):
            # Replace following if else block with one line -- now records_src is always a list of dicts.
            chunk = records_src[chunk_index *
                                row_chunk_size:(chunk_index + 1) *
                                row_chunk_size]

            #            print "importing records as type:", type(records_src), "(", type(records_src[0]), "(", type(records_src[0][records_src[0].keys()[0]]), " ) )"

            # Import chunk.
            #            return_info = project.import_records(chunk, overwrite=overwrite, format=format, return_content=return_content)
            return_info = project.import_records(
                chunk,
                overwrite=overwrite,
                format='json',
                return_content=return_content
            )  # if format was CSV, the CSV string has already been converted to 'json' format.

            # Combine import results for each chunk.
            if (return_content == 'count'):
                try:
                    num_modified += return_info["count"]
                except KeyError:
                    print failure_msg
                    sys.exit()
            elif (return_content == 'ids'):
                if (return_info != {}):
                    ids_imported.extend(return_info)
                else:
                    print chunk
                    print return_info
                    print failure_msg
                    sys.exit()

            if (not quiet):
                completion_percentage = float(chunk_index +
                                              1) / float(num_chunks) * 100.
                sys.stdout.write('\r')
                sys.stdout.write('%.2f%% complete' % (completion_percentage, ))
                sys.stdout.flush()
        if (not quiet):
            sys.stdout.write('\n\r')
            #            sys.stdout.write('%.2f%% complete' % (float(100),))
            sys.stdout.flush()

        # Report import results.
        if (not quiet):
            if (return_content == 'count'):
                print "Number of records imported: " + str(num_modified)
            elif (return_content == 'ids'):
                id_string = ""
                for id in ids_imported:
                    id_string += id + " "
                id_string = id_string.rstrip()
                print "Number of records imported: " + str(len(ids_imported))
                print "IDs of records imported:"
                print id_string
    return
Exemplo n.º 3
0
def setFormCompleteBlanks(api_url,
                          api_key,
                          out_dir=None,
                          quick_import=False,
                          instruments=None,
                          statuses=["0"]):
    """Find all empty forms in a project, and set their form_complete variables to blanks.
Parameters
----------
    api_url : str
        API URL for REDCap project to be modified
    api_key : str
        API token for REDCap project to be modified
"""
    ## Export all records for project
    records = exportRecords(api_url,
                            api_key,
                            export_form_completion=True,
                            forms=instruments)

    # Convert records to a Pandas DataFrame
    records_df = pandas.DataFrame(records).astype(unicode)

    ## Find blank forms form by form
    # Get parsed metadata.
    project = redcap.Project(api_url, api_key)
    def_field = project.def_field
    project_info = exportProjectInfo(api_url, api_key)
    project_longitudinal = bool(project_info["is_longitudinal"])
    project_repeating = bool(
        project_info["has_repeating_instruments_or_events"])
    events = getEvents(api_url,
                       api_key)  #project, project_info, project_longitudinal)
    metadata_raw = project.export_metadata()
    form_event_mapping = exportFormEventMapping(project, project_longitudinal)
    repeating_forms_events = exportRepeatingFormsEvents(
        api_url, api_key, project_repeating)
    forms = exportFormsOrdered(api_url, api_key)
    form_repetition_map = createFormRepetitionMap(project_longitudinal,
                                                  project_repeating,
                                                  form_event_mapping,
                                                  repeating_forms_events,
                                                  forms)
    metadata = parseMetadata(def_field, project_info, project_longitudinal,
                             project_repeating, events, metadata_raw,
                             form_event_mapping, repeating_forms_events, forms,
                             form_repetition_map)

    ## Set which fields form the primary key
    primary_key = [def_field]
    if project_longitudinal:
        primary_key.append('redcap_event_name')
    if project_repeating:
        primary_key.extend(
            ['redcap_repeat_instrument', 'redcap_repeat_instance'])

    # If quick_import=True, still require a single import confirmation to verify that the correct REDCap project was selected (in this case, the following variable will be set to True after the first import.). If quick_import=False, all imports will be made with quiet=False.
    quiet_import = False

    # Loop through forms and determine if they are empty
    for form in forms:
        form_name = form['instrument_name']
        # If a set of instuments was specified, check that current instrument is in the list.
        if ((not instruments is None) and (not form_name in instruments)):
            continue

        form_complete_field = form_name + "_complete"

        # Generate list of fields in the form.
        fields_in_form_checkbox = []
        fields_in_form_noncheckbox = []
        for field_name, field_obj in metadata.iteritems():
            if (field_obj.form_name == form_name
                ):  # if field is in current form.
                if (field_obj.field_type == 'checkbox'):
                    fields_in_form_checkbox.append(field_name)
                else:
                    fields_in_form_noncheckbox.append(field_name)

        # For current form, find all rows in which the form is completely empty, and the form_complete field is '0'.
        empty_form_rows = records_df.loc[(
            (records_df[fields_in_form_checkbox].isin(['', '0']).all(axis=1)) &
            (records_df[fields_in_form_noncheckbox].isin(['']).all(axis=1)) &
            (records_df[form_complete_field].isin(statuses))),
                                         primary_key + [form_complete_field]]

        ## Either save a report of the form_complete fields which should be set to blank, or import them directly.
        if (len(empty_form_rows) > 0):
            if (not out_dir is None):
                # Save reports to files if an output directory was specified.
                file_name = form_name + '.csv'
                out_path = os.path.join(out_dir, file_name)
                empty_form_rows.to_csv(out_path, index=False, encoding='utf-8')
            else:
                ## If an output directory was not specified, overwrite the form_complete '0's directly.
                # Set the '0's to blanks
                empty_form_rows.loc[:, form_complete_field] = ''

                # Convert the import data to CSV.
                empty_form_rows_csv = empty_form_rows.to_csv(index=False,
                                                             encoding='utf-8')

                # Import the data.
                importRecords(api_url,
                              api_key,
                              empty_form_rows_csv,
                              quick=quick_import,
                              quiet=quiet_import,
                              format='csv',
                              overwrite='overwrite',
                              return_content='count')
                if (quick_import and (not quiet_import)):
                    quiet_import = True
                    print "The remaining import steps will not require user confirmation, since quick_import=True."
    return
Exemplo n.º 4
0
def backupProjects(api_key_path,
                   out_dir,
                   code_name_list=None,
                   modification_notes=None,
                   timestamp=False,
                   skip_files=False):
    # Check that output directory exists.
    if (not os.path.isdir(out_dir)):
        while True:
            cont = raw_input("Output directory '" + out_dir +
                             "' does not exist. Create it? [y]/n? ")
            if (cont.lower() in ['', "y", "yes"]):
                os.makedirs(out_dir)
                break
            elif (cont.lower() in ["n", "no"]):
                print "Quitting"
                sys.exit()
            else:
                print "Unrecognized response. Please try again."
                pass

    # Check that the yaml file containing the project API URLs and keys exists
    if (not os.path.exists(api_key_path)):
        raise ValueError("Input API key file '" + api_key_path +
                         "' does not exist.")

    # Create OrderedDict of projects to backup
    with open(api_key_path, 'r') as handle:
        projects_to_back_up = yaml.load(handle, Loader=yaml.SafeLoader)

    ## If a specific set of project code names was given, check that their keys exist, and remove all other projects from the OrderedDict.
    if (not code_name_list is None):
        # Check that each code_name appears in the api_keys file.
        for code_name in code_name_list:
            if (not code_name in projects_to_back_up.keys()):
                raise ValueError("Requested project code name '" + code_name +
                                 "' not found in '" + api_key_path + "'")
        # Remove code names which were not requested.
        projects_to_back_up = OrderedDict([(code_name,
                                            projects_to_back_up[code_name])
                                           for code_name in code_name_list])

    # Create subdirectory for current date.
    if (not timestamp):
        date_string = datetime.datetime.today().strftime('%Y-%m-%d')
    else:
        date_string = datetime.datetime.today().strftime('%Y-%m-%d_%H-%M-%S')
    date_dir = os.path.join(out_dir, date_string)
    if (not os.path.isdir(date_dir)):
        os.mkdir(date_dir)

    # Save a message about the reason the backup is being performed in the date_dir.
    modification_notes_path = os.path.join(date_dir, 'README.txt')
    if (modification_notes is None):
        modification_notes = str(
            raw_input(
                "Please enter reason for performing backup. This entry will be written to '"
                + modification_notes_path +
                "' for future reference. (press RETURN to skip): "))
    if (modification_notes != ''):
        with open(modification_notes_path, 'wb') as handle:
            handle.write(modification_notes)

    # Loop over projects.
    for code_name, api_info in projects_to_back_up.iteritems():
        api_url = api_info['url']
        api_key = api_info['key']

        # If you do not have API rights to access the project, exportProjectInfo will return a dict with something like: {u'error': u'You do not have API rights because your privileges have expired for this project as of 2020-12-01.'} ; try to handle this case.
        project_info = exportProjectInfo(api_url, api_key)
        if 'error' in project_info:
            raise Exception('Error returned by REDCap: ' +
                            project_info['error'])
        else:
            pid = str(project_info['project_id'])
        # Backup project
        backupProject(api_url,
                      api_key,
                      date_dir,
                      date_string,
                      skip_files=args.skip_files)

    return
Exemplo n.º 5
0
def labelRecords(api_url,
                 api_key,
                 records_all,
                 records_requested,
                 all_requested,
                 project,
                 requested_format,
                 label_overwrite=False,
                 quiet=False):
    """This function takes records exported from exportRecords.py and replaces entries with the value
    'rr_hidden' if the (row number, field name) is hidden by branching logic, 'rr_invalid' if the 
    (row number, field name) is not supposed to be filled in for the row's (record ID, event, repeat 
    form, repeat instance) combination, and 'rr_error' if there is an error in the branching logic for
    the field. 

    If label_overwrite=True, fields containing values can be overwritten by 'rr_hidden' or 'rr_error'.
    """

    # Get the field name of the unique identifying field (e.g. "ipssid").
    if (not quiet):
        p_info = ProgressBar("(1/4) Getting project information")
        pass
    def_field = project.def_field

    #    records = records_all

    # Load high-level projct information.
    project_info = exportProjectInfo(api_url, api_key)
    project_longitudinal = bool(project_info["is_longitudinal"])
    project_repeating = bool(
        project_info["has_repeating_instruments_or_events"])

    # Load list of events
    #events = getEvents(project, project_info, project_longitudinal)
    events = getEvents(api_url, api_key)

    # Load raw data dictionary.
    metadata_raw = project.export_metadata()

    # Load instrument-event mapping
    form_event_mapping = exportFormEventMapping(project, project_longitudinal)

    # Load information specifying which forms are repeating.
    repeating_forms_events = exportRepeatingFormsEvents(
        api_url, api_key, project_repeating)

    # Generate list of forms - list of dicts with two keys: 'instrument_label' and 'instrument_name'
    forms = exportFormsOrdered(api_url, api_key)
    forms_list = project.forms
    form_complete_names = []
    for form_name in forms_list:
        form_complete_name = form_name + '_complete'
        form_complete_names.append(form_complete_name)

    # Generate a dictionary with form_names as keys; each entry is a dict specifying in which
    # events the form is non-repeating, indpendently repeating, or dependently repeating.
    form_repetition_map = createFormRepetitionMap(project_longitudinal,
                                                  project_repeating,
                                                  form_event_mapping,
                                                  repeating_forms_events,
                                                  forms)

    # Gather data about each variable.
    metadata = parseMetadata(def_field, project_info, project_longitudinal,
                             project_repeating, events, metadata_raw,
                             form_event_mapping, repeating_forms_events, forms,
                             form_repetition_map)

    # Generate non-redundant list of record IDs.
    record_id_map = createRecordIDMap(def_field, records_all)

    # Build list of primary keys -- (ID, event, repeat form, repeat instance) tuples -- in the requested records.
    if (not all_requested):
        if (not project_longitudinal) and (not project_repeating):
            prim_key = (def_field, )
        elif (not project_longitudinal) and (project_repeating):
            prim_key = (def_field, 'redcap_repeat_instrument',
                        'redcap_repeat_instance')
        elif (project_longitudinal) and (not project_repeating):
            prim_key = (def_field, 'redcap_event_name')
        elif (project_longitudinal) and (project_repeating):
            prim_key = (def_field, 'redcap_event_name',
                        'redcap_repeat_instrument', 'redcap_repeat_instance')

        # Populate list of primary keys:
        prim_key_vals = []
        for row_index in range(len(records_requested)):
            row = records_requested[row_index]
            prim_key_val = tuple(row[key] for key in prim_key)
            prim_key_vals.append(prim_key_val)
#        print prim_key_vals

# Build list of fields in the requested records.
    fields_requested = tuple(
        field_name for field_name in records_requested[0].keys()
        if (field_name in metadata) or (field_name in form_complete_names))
    #    print fields_requested

    # Label records.
    if (not quiet):
        p_info.stop()
        p_check = ProgressBar(
            "(2/4) Checking whether fields are hidden and applicable to the current row"
        )
        milestone = max(len(records_all) / 2000, 1)
        pass
    list_invalid = []
    list_branching_logic_error = []
    list_hidden = []
    list_row_indices = []  # list of row indices in requested records.
    for row_index in range(len(records_all)):
        row = records_all[row_index]
        #        print row['ipssid'], row['redcap_event_name'], row['redcap_repeat_instrument'], row['redcap_repeat_instance']
        if (not all_requested):  # if certain rows are being excluded
            prim_key_val = tuple(row[key] for key in prim_key)
            #            print prim_key_val
            if (not prim_key_val
                    in prim_key_vals):  # if row is not in records_requested.
                continue
            else:
                list_row_indices.append(row_index)
#        for field_name, field in metadata.iteritems():
        for field_name in fields_requested:
            # The record ID field is always valid; do not perform checks on it.
            if (field_name == def_field):
                continue

            # DEBUG: Skip the form_complete fields for now.
            if (field_name in form_complete_names):
                continue

            field = metadata[field_name]

            # Check if (row index, field name) can possibly contain data.
            cell_valid = isEventFieldInstanceValid(project_longitudinal,
                                                   project_repeating,
                                                   form_repetition_map, field,
                                                   row)
            if (not cell_valid):
                #                records_all[row_index][field_name] = "r_invalid"
                list_invalid.append((row_index, field_name))

            # Check if branching logic function is bugged for current field. Replace all values with "r_error" if so.
            if cell_valid:
                if (field.branching_logic_errors != []):
                    bl_valid = False
                    list_branching_logic_error.append((row_index, field_name))
                    warnings.warn(
                        'Malformed branching logic found for field: ' +
                        field_name)
                else:
                    bl_valid = True

            # Check if (row index, field name) is hidden by branching logic.
            try:
                if cell_valid and bl_valid:
                    if (field.branching_logic == None):
                        visible = True
                    elif field.branching_logic(row_index, form_repetition_map,
                                               records_all, record_id_map):
                        visible = True
                    else:
                        visible = False
                    if (not visible):
                        list_hidden.append((row_index, field_name))
            except:
                print field_name
        if (not quiet):
            if (row_index % milestone == 0):
                p_check.update(float(row_index + 1) / float(len(records_all)))
                pass
    if (not quiet):
        p_check.stop()
        p_label = ProgressBar(
            "(3/4) Applying labels to cells that are hidden or inavlid for the current row"
        )
        num_cells_to_label = len(list_hidden) + len(
            list_branching_logic_error) + len(list_invalid)
        num_labelled = 0
        milestone = max(num_cells_to_label / 2000, 1)
        pass

    # Markup records with types after finding all of them
    for cell in list_hidden:
        row_index, field_name = cell
        field_is_checkbox = (metadata[field_name].field_type == 'checkbox')
        if label_overwrite:
            records_all[row_index][field_name] = "rr_hidden"
        elif field_is_checkbox:  # THIS SECTION IS REDUNDANT.
            cb_blank = True
            for option in metadata[field_name].choices:
                if (records_all[row_index][option] == '1'
                    ):  # if an option is found to be selected.
                    cb_blank = False
            if cb_blank:
                records_all[row_index][field_name] = 'rr_hidden'
        else:
            if (records_all[row_index][field_name] == ''):
                records_all[row_index][field_name] = 'rr_hidden'
        if (not quiet):
            num_labelled += 1
            if (num_labelled % milestone == 0):
                p_label.update(float(num_labelled) / float(num_cells_to_label))
    for cell in list_branching_logic_error:
        row_index, field_name = cell
        field_is_checkbox = (metadata[field_name].field_type == 'checkbox')
        if label_overwrite:
            records_all[row_index][field_name] = "rr_blerror"
        elif field_is_checkbox:  # THIS SECTION IS REDUNDANT.
            cb_blank = True
            for option in metadata[field_name].choices:
                if (records_all[row_index][option] == '1'
                    ):  # if an option is found to be selected.
                    cb_blank = False
            if cb_blank:
                records_all[row_index][field_name] = 'rr_blerror'
        else:
            if (records_all[row_index][field_name] == ''):
                records_all[row_index][field_name] = 'rr_blerror'
        if (not quiet):
            num_labelled += 1
            if (num_labelled % milestone == 0):
                p_label.update(float(num_labelled) / float(num_cells_to_label))
    for cell in list_invalid:  # checkbox fields will be blank in this case.
        row_index, field_name = cell
        if (label_overwrite) or (records_all[row_index][field_name] == ''):
            records_all[row_index][field_name] = "rr_invalid"
        if (not quiet):
            num_labelled += 1
            if (num_labelled % milestone == 0):
                p_label.update(float(num_labelled) / float(num_cells_to_label))
                pass
    if (not quiet):
        p_label.stop()
        p_check = ProgressBar("(4/4) Formatting and checking for errors")
        pass

    # Select only the requested records from records_all
    if all_requested:
        records = records_all
    else:
        records = []
        for row_index in list_row_indices:
            records.append(
                {key: records_all[row_index][key]
                 for key in prim_key})
            records[-1]['redcap_data_access_group'] = records_all[row_index][
                'redcap_data_access_group']
            for field_name in fields_requested:
                records[-1][field_name] = records_all[row_index][field_name]

    # Convert to CSV if requested.
    if (requested_format == "csv"):
        records_df = pandas.DataFrame(records)
        ## Rearrange the columns to their standard CSV order.
        # Get column list from DataFrame.
        columns = records_df.columns.tolist()
        columns_ordered = []

        # To generate a list of all fields in order, including those that were not requested, build a list of all real data fields, and all form complete fields. Request all of these for the first row in CSV format, then split the CSV header at each comma to get the ordered list of headers.
        first_id = records_all[0][def_field]
        first_record = project.export_records(
            records=[first_id],
            fields=list(
                set([
                    field.split('___')[0]
                    for field in records_requested[0].keys() if (not field in [
                        'redcap_event_name', 'redcap_repeat_instrument',
                        'redcap_repeat_instance', 'redcap_data_access_group'
                    ])
                ])),
            export_data_access_groups=True,
            format='csv')
        columns_ordered = [
            col.strip() for col in first_record.split('\n')[0].split(',')
        ]

        # THE FOLLOWING REORDERING METHOD DOES NOT WORK IF FORM_COMPLETE FIELDS ARE INCLUDED.
        # Add the "standard" columns to the list.
        #for heading in [def_field, "redcap_event_name", "redcap_repeat_instrument", "redcap_repeat_instance", "redcap_data_access_group"]:
        #if (heading in columns) and (not heading in columns_ordered): # if heading was exported.
        #columns_ordered.append(heading)

        # Add the fields in the list they appear in metadata, excluding unexported fields.
        #for form_name in forms_list:
        #for field_name in metadata:
        #if (metadata[field_name].form_name == form_name) and (field_name in columns) and (not field_name in columns_ordered):
        #columns_ordered.append(field_name)

        # Add the form_complete field for the current form if it was requested.
        #form_complete_name = form_name + '_complete'
        #if (form_complete_name in columns) and (not form_complete_name in columns_ordered):
        #columns_ordered.append(form_complete_name)

        # Do sanity check that size of ordered headings is same as size of unordered headings.
        if (len(columns) != len(columns_ordered)):
            print "Number of ordered headings differs from size of unordered headings"
        if (len(set(columns_ordered)) != len(columns_ordered)):
            print "ERROR: Duplicate headings in ordered column headings."
            for heading in columns_ordered:
                if (columns_ordered.count(heading) > 1):
                    print "Duplicated heading:", heading
        for heading in list(set(columns) - set(columns_ordered)):
            print "Heading missing from ordered heading list:", heading
        for heading in list(set(columns_ordered) - set(columns)):
            print "Heading missing from unorderd heading list:", heading

        records_df = records_df[columns_ordered]  # Reorder the columns.
        records = records_df.to_csv(
            index=False, encoding='utf-8')  # Convert DataFrame to CSV string

    if (not quiet):
        p_check.stop()
        pass
    return records
Exemplo n.º 6
0
def exportFiles(api_url, api_key, out_dir, flat=False):
    """Export all files stored in File Upload fields (this does not include files upload to the 'File Repository'.
Parameters:
    api_url: str, API URL of REDCap instance on which project is housed.
    api_key: str, user's API token
    out_dir: str, the directory in which the files
    flat: bool, if True, save all files in the same output directory, if False, create subdirectories to store files from distinct records, events, and instances."""

    ## Determine whether project is longitudinal, and whether it has repeating forms or events.
    project = redcap.Project(api_url, api_key)
    def_field = project.def_field
    project_info = exportProjectInfo(api_url, api_key)
    project_longitudinal = bool(project_info["is_longitudinal"])
    project_repeating = bool(
        project_info["has_repeating_instruments_or_events"])
    primary_key = [def_field]
    if project_longitudinal:
        primary_key.append('redcap_event_name')
    if project_repeating:
        primary_key.append('redcap_repeat_instrument')
        primary_key.append('redcap_repeat_instance')
    non_data_fields = primary_key + ['redcap_data_access_group']

    ## Identify all File Upload fields.
    file_upload_fields = []
    metadata_raw = project.export_metadata()  # list of dicts
    for row in metadata_raw:
        if (row['field_type'] == 'file'):
            file_upload_fields.append(row['field_name'])

    ## Determine which File Upload fields have an uploaded file. Export and save every file in the File Upload fields.
    if (len(file_upload_fields) == 0):
        return  # Quit if there are no File upload fields
        pass

    records = exportRecords(
        api_url, api_key, fields=file_upload_fields
    )  # should contain text like "[document]" to indicate where a file was uploaded

    for row in records:
        for field, value in row.iteritems():
            if (field in non_data_fields):
                continue
            if (value !=
                    ''):  # if field is a File Upload field containing a file.
                record = row[def_field]
                if project_longitudinal:
                    event = row['redcap_event_name']
                else:
                    event = ''
                if project_repeating:
                    instance = str(row['redcap_repeat_instance'])
                else:
                    instance = ''

                # Set the file name prefix and output directory using the record id, event, and instance.
                if flat:
                    prefix = 'record-' + record + '_'
                    if project_longitudinal:  # There is always an event name in longitudinal projects.
                        prefix += 'event-' + event + '_'
                    if (instance !=
                            ''):  # don't add blank instances to prefix.
                        prefix += 'instance-' + instance + '_'
                    prefix += 'field-' + field + '_'
                    prefix += 'filename-'
                    full_out_dir = out_dir  # save all files to the same directory
                else:
                    prefix = 'field-' + field + '_'
                    prefix += 'filename-'
                    full_out_dir = os.path.join(out_dir, 'record-' + record)
                    if project_longitudinal:  # There is always an event name in longitudinal projects.
                        full_out_dir = os.path.join(full_out_dir,
                                                    'event-' + event)
                    if (instance !=
                            ''):  # don't add blank instances to prefix.
                        full_out_dir = os.path.join(full_out_dir,
                                                    'instance-' + instance)
                    if (not os.path.isdir(full_out_dir)):
                        #print "Creating directory: "+full_out_dir
                        os.makedirs(full_out_dir)

                # Export the file.
                exportFile(full_out_dir,
                           api_url,
                           api_key,
                           record,
                           event,
                           instance,
                           field,
                           prefix=prefix)
Exemplo n.º 7
0
def getEvents(api_url, api_key, event_ids_path=None, quiet=False):

    # Get project information
    project = redcap.Project(api_url, api_key)
    project_info = exportProjectInfo(api_url, api_key)
    project_longitudinal = bool(project_info["is_longitudinal"])

    if project_longitudinal:
        # Read the mapping from event name to event IDs at the path indicated in settings.yml
        api_settings = ApiSettings()
        api_url, api_key, code_name = api_settings.getApiCredentials(
            api_url=api_url, api_key=api_key
        )  # code_name will be None if the entry does not exist in api_keys.yml
        if (
                event_ids_path is None
        ):  # if a path to an event IDs map yaml file was specified explicitly, use that instead of the path specified in settings.yml
            event_ids_path = api_settings.settings['event_ids_path']
        if (not os.path.exists(event_ids_path)):
            if (not quiet):
                warnings.warn("Path to event ID map does not exist: '" +
                              event_ids_path + "'")
            event_ids_map = {}
        elif (code_name is None):
            event_ids_map = {}
        else:
            with open(event_ids_path, 'r') as handle:
                event_ids_map_all = yaml.load(handle, Loader=yaml.SafeLoader)

                # Convert the event_ids to strings.
                for ii, event_ids_map in event_ids_map_all.iteritems():
                    if (not event_ids_map is None):
                        event_ids_map_all[ii] = {
                            k: str(v)
                            for k, v in event_ids_map.iteritems()
                        }
            try:
                event_ids_map = event_ids_map_all[code_name]
            except KeyError:  # if project_code name does not have an entry in event_ids.yml
                if (not quiet):
                    warnings.warn("code_name '" + code_name +
                                  "' is not an entry in '" + event_ids_path +
                                  "'")
                event_ids_map = {}

        events = {
        }  # dict with unique_event_name as keys. Items include the 'pretty' event name.

        pycap_events = project.events  # Includes days_offset, custom_event_label etc. So far only want unique_event_name
        for pycap_event_index in range(len(pycap_events)):
            pycap_event = pycap_events[pycap_event_index]
            #            pycap_events[pycap_event_index] = pycap_event["unique_event_name"]
            events[pycap_event["unique_event_name"]] = {}
            events[pycap_event["unique_event_name"]][
                "day_offset"] = pycap_event["day_offset"]
            events[pycap_event["unique_event_name"]][
                "custom_event_label"] = pycap_event["custom_event_label"]
            events[pycap_event["unique_event_name"]][
                "event_name"] = pycap_event["event_name"]
            events[pycap_event["unique_event_name"]]["arm_num"] = pycap_event[
                "arm_num"]
            events[pycap_event["unique_event_name"]][
                "offset_min"] = pycap_event["offset_min"]
            events[pycap_event["unique_event_name"]][
                "offset_max"] = pycap_event["offset_max"]

            #            events[pycap_event["unique_event_name"]]["event_id"] = getEventIDs(api_url, api_key, pycap_event["unique_event_name"]) # NOT SURE HOW TO GET THIS INFORMATION AUTOMATICALLY.
            if (event_ids_map != {}):
                try:
                    event_id = event_ids_map[pycap_event["unique_event_name"]]
                except KeyError:
                    if (not quiet):
                        warnings.warn("Event named '" +
                                      pycap_event["unique_event_name"] +
                                      "' not found in '" + event_ids_path +
                                      "'")
                    event_id = None
            else:
                event_id = None
            events[pycap_event["unique_event_name"]]["event_id"] = event_id
    else:
        events = None
    return events