def failed_send_handler( participant: Participant, fcm_token: str, error_message: str, schedules: List[ScheduledEvent] ): """ Contains body of code for unregistering a participants push notification behavior. Participants get reenabled when they next touch the app checkin endpoint. """ if participant.push_notification_unreachable_count >= PUSH_NOTIFICATION_ATTEMPT_COUNT: now = timezone.now() fcm_hist = ParticipantFCMHistory.objects.get(token=fcm_token) fcm_hist.unregistered = now fcm_hist.save() PushNotificationDisabledEvent( participant=participant, timestamp=now, count=participant.push_notification_unreachable_count ).save() # disable the credential participant.push_notification_unreachable_count = 0 participant.save() print(f"Participant {participant.patient_id} has had push notifications " f"disabled after {PUSH_NOTIFICATION_ATTEMPT_COUNT} failed attempts to send.") else: now = None participant.push_notification_unreachable_count += 1 participant.save() print(f"Participant {participant.patient_id} has had push notifications failures " f"incremented to {participant.push_notification_unreachable_count}.") create_archived_events(schedules, success=False, created_on=now, status=error_message) enqueue_weekly_surveys(participant, schedules)
def migrate_users(): m_user_list = MUserSet.iterator() d_user_list = [] for m_user in m_user_list: with error_handler: # Get information about the Participant's Study m_study_id = m_user['study_id'] try: d_study_info = study_id_dict[m_study_id] except KeyError: print('Study {} is referenced by a User but does not exist.'.format(m_study_id)) continue # Django convention is to use the empty string rather than None in CharFields device_id = m_user['device_id'] or '' os_type = m_user['os_type'] or '' # Build a new Django Participant d_user = DUser( patient_id=m_user['_id'], device_id=device_id, os_type=os_type, study_id=d_study_info['pk'], password=m_user['password'], salt=m_user['salt'], deleted=d_study_info['deleted'], ) # Validate the Participant and add it to the bulk_create list d_user.full_clean() d_user_list.append(d_user) # Bulk_create the Participants DUser.objects.bulk_create(d_user_list) for m_user in MUserSet.iterator(): with error_handler: m_user_id = m_user['_id'] try: d_user_id = DUser.objects.filter(patient_id=m_user['_id']).values('pk').get() except DUser.DoesNotExist: msg = 'User {} was not created.'.format(m_user_id) print(msg) # raise ObjectCreationException(msg) user_id_dict[m_user_id] = d_user_id
def success_send_handler(participant: Participant, fcm_token: str, schedules: List[ScheduledEvent]): # If the query was successful archive the schedules. Clear the fcm unregistered flag # if it was set (this shouldn't happen. ever. but in case we hook in a ui element we need it.) print(f"Push notification send succeeded for {participant.patient_id}.") # this condition shouldn't occur. Leave in, this case would be super stupid to diagnose. fcm_hist = ParticipantFCMHistory.objects.get(token=fcm_token) if fcm_hist.unregistered is not None: fcm_hist.unregistered = None fcm_hist.save() participant.push_notification_unreachable_count = 0 participant.save() create_archived_events(schedules, success=True, status=ArchivedEvent.SUCCESS) enqueue_weekly_surveys(participant, schedules)
def create_new_participant(): """ Creates a new user, generates a password and keys, pushes data to s3 and user database, adds user to the study they are supposed to be attached to and returns a string containing password and patient id. """ study_id = request.values['study_id'] patient_id, password = Participant.create_with_password(study_id=study_id) participant = Participant.objects.get(patient_id=patient_id) study = Study.objects.get(id=study_id) add_fields_and_interventions(participant, study) # Create an empty file on S3 indicating that this user exists study_object_id = Study.objects.filter(pk=study_id).values_list( 'object_id', flat=True).get() s3_upload(patient_id, b"", study_object_id) create_client_key_pair(patient_id, study_object_id) repopulate_all_survey_scheduled_events(study, participant) response_string = 'Created a new patient\npatient_id: {:s}\npassword: {:s}'.format( patient_id, password) flash(response_string, 'success') return redirect('/view_study/{:s}'.format(study_id))
def test_participant_mongo_integrity(self): study = Study(**self.translated_reference_study) study.save() reference_participant = self.translated_reference_participant django_participant = Participant(study=study, **reference_participant).as_unpacked_native_python() x = compare_dictionaries(reference_participant, django_participant, ignore=['id', 'deleted']) self.assertTrue(x)
def participant_credential_generator(study_id, number_of_new_patients, desired_filename): si = StreamingBytesIO() filewriter = writer(si) filewriter.writerow(['Patient ID', "Registration password"]) study_object_id = Study.objects.filter(pk=study_id).values_list('object_id', flat=True).get() study_name = Study.objects.filter(pk=study_id).values_list('name', flat=True).get() for _ in xrange(number_of_new_patients): patient_id, password = Participant.create_with_password(study_id=study_id) # Creates an empty file on s3 indicating that this user exists s3_upload(construct_s3_raw_data_path(study_object_id, patient_id), "", study_object_id, raw_path=True) filewriter.writerow([patient_id, password]) yield si.getvalue() si.empty()
def csv_generator(study_id, number_of_new_patients): si = StreamingStringsIO() filewriter = writer(si) filewriter.writerow(['Patient ID', "Registration password"]) study_object_id = Study.objects.filter(pk=study_id).values_list( 'object_id', flat=True).get() for _ in range(number_of_new_patients): patient_id, password = Participant.create_with_password( study_id=study_id) # Creates an empty file on s3 indicating that this user exists s3_upload(patient_id, "", study_object_id) create_client_key_pair(patient_id, study_object_id) filewriter.writerow([patient_id, password]) yield si.getvalue() si.empty()
def create_new_patient(): """ Creates a new user, generates a password and keys, pushes data to s3 and user database, adds user to the study they are supposed to be attached to and returns a string containing password and patient id. """ study_id = request.values['study_id'] patient_id, password = Participant.create_with_password(study_id=study_id) # Create an empty file on S3 indicating that this user exists study_object_id = Study.objects.filter(pk=study_id).values_list('object_id', flat=True).get() s3_upload(construct_s3_raw_data_path(study_object_id, patient_id), "", study_object_id, raw_path=True) response_string = 'Created a new patient\npatient_id: {:s}\npassword: {:s}'.format(patient_id, password) flash(response_string, 'success') return redirect('/view_study/{:s}'.format(study_id))
def participant_csv_generator(study_id, number_of_new_patients): study = Study.objects.get(pk=study_id) si = StreamingStringsIO() filewriter = writer(si) filewriter.writerow(['Patient ID', "Registration password"]) for _ in range(number_of_new_patients): patient_id, password = Participant.create_with_password( study_id=study_id) participant = Participant.objects.get(patient_id=patient_id) add_fields_and_interventions(participant, Study.objects.get(id=study_id)) # Creates an empty file on s3 indicating that this user exists s3_upload(patient_id, b"", study.object_id) create_client_key_pair(patient_id, study.object_id) repopulate_all_survey_scheduled_events(study, participant) filewriter.writerow([patient_id, password]) yield si.getvalue() si.empty()
def do_process_user_file_chunks(count: int, error_handler: ErrorHandler, skip_count: int, participant: Participant): """ Run through the files to process, pull their data, put it into s3 bins. Run the file through the appropriate logic path based on file type. If a file is empty put its ftp object to the empty_files_list, we can't delete objects in-place while iterating over the db. All files except for the audio recording files are in the form of CSVs, most of those files can be separated by "time bin" (separated into one-hour chunks) and concatenated and sorted trivially. A few files, call log, identifier file, and wifi log, require some triage beforehand. The debug log cannot be correctly sorted by time for all elements, because it was not actually expected to be used by researchers, but is apparently quite useful. Any errors are themselves concatenated using the passed in error handler. In a single call to this function, count files will be processed, starting from file number skip_count. The first skip_count files are expected to be files that have previously errored in file processing. """ # Declare a defaultdict containing a tuple of two double ended queues (deque, pronounced "deck") all_binified_data = defaultdict(lambda: (deque(), deque())) ftps_to_remove = set() # The ThreadPool enables downloading multiple files simultaneously from the network, and continuing # to download files as other files are being processed, making the code as a whole run faster. pool = ThreadPool(CONCURRENT_NETWORK_OPS) survey_id_dict = {} # A Django query with a slice (e.g. .all()[x:y]) makes a LIMIT query, so it # only gets from the database those FTPs that are in the slice. print(participant.as_native_python()) print(len(participant.files_to_process.exclude(deleted=True).all())) print(count) print(skip_count) files_to_process = participant.files_to_process.exclude(deleted=True).all() for data in pool.map(batch_retrieve_for_processing, files_to_process[skip_count:count + skip_count], chunksize=1): with error_handler: # If we encountered any errors in retrieving the files for processing, they have been # lumped together into data['exception']. Raise them here to the error handler and # move to the next file. if data['exception']: print("\n" + data['ftp']['s3_file_path']) print(data['traceback']) ################################################################ # YOU ARE SEEING THIS EXCEPTION WITHOUT A STACK TRACE # BECAUSE IT OCCURRED INSIDE POOL.MAP ON ANOTHER THREAD ################################################################ raise data['exception'] if data['chunkable']: newly_binified_data, survey_id_hash = process_csv_data(data) if data['data_type'] in SURVEY_DATA_FILES: survey_id_dict[ survey_id_hash] = resolve_survey_id_from_file_name( data['ftp']["s3_file_path"]) if newly_binified_data: append_binified_csvs(all_binified_data, newly_binified_data, data['ftp']) else: # delete empty files from FilesToProcess ftps_to_remove.add(data['ftp']['id']) continue # if not data['chunkable'] else: timestamp = clean_java_timecode( data['ftp']["s3_file_path"].rsplit("/", 1)[-1][:-4]) # Since we aren't binning the data by hour, just create a ChunkRegistry that # points to the already existing S3 file. ChunkRegistry.register_unchunked_data( data['data_type'], timestamp, data['ftp']['s3_file_path'], data['ftp']['study'].pk, data['ftp']['participant'].pk, data['file_contents'], ) ftps_to_remove.add(data['ftp']['id']) pool.close() pool.terminate() more_ftps_to_remove, number_bad_files = upload_binified_data( all_binified_data, error_handler, survey_id_dict) ftps_to_remove.update(more_ftps_to_remove) # Actually delete the processed FTPs from the database FileToProcess.objects.filter(pk__in=ftps_to_remove).delete() # Garbage collect to free up memory gc.collect() return number_bad_files
def decrypt_device_file(original_data: bytes, participant: Participant) -> bytes: """ Runs the line-by-line decryption of a file encrypted by a device. """ def create_line_error_db_entry(error_type): # declaring this inside decrypt device file to access its function-global variables if IS_STAGING: LineEncryptionError.objects.create( type=error_type, base64_decryption_key=encode_base64(aes_decryption_key), line=encode_base64(line), prev_line=encode_base64(file_data[i - 1] if i > 0 else ''), next_line=encode_base64(file_data[i + 1] if i < len(file_data) - 1 else ''), participant=participant, ) bad_lines = [] error_types = [] error_count = 0 good_lines = [] # don't refactor to pop the decryption key line out of the file_data list, this list # can be thousands of lines. Also, this line is a 2x memcopy with N new bytes objects. file_data = [line for line in original_data.split(b'\n') if line != b""] if not file_data: raise HandledError( "The file had no data in it. Return 200 to delete file from device." ) private_key_cipher = participant.get_private_key() aes_decryption_key = extract_aes_key(file_data, participant, private_key_cipher, original_data) for i, line in enumerate(file_data): # we need to skip the first line (the decryption key), but need real index values in i if i == 0: continue if line is None: # this case causes weird behavior inside decrypt_device_line, so we test for it instead. error_count += 1 create_line_error_db_entry(LineEncryptionError.LINE_IS_NONE) error_types.append(LineEncryptionError.LINE_IS_NONE) bad_lines.append(line) print("encountered empty line of data, ignoring.") continue try: good_lines.append( decrypt_device_line(participant.patient_id, aes_decryption_key, line)) except Exception as error_orig: error_string = str(error_orig) error_count += 1 error_message = "There was an error in user decryption: " if isinstance(error_orig, (Base64LengthException, PaddingException)): # this case used to also catch IndexError, this probably changed after python3 upgrade error_message += "Something is wrong with data padding:\n\tline: %s" % line create_line_error_db_entry(LineEncryptionError.PADDING_ERROR) error_types.append(LineEncryptionError.PADDING_ERROR) bad_lines.append(line) continue # case not reachable, decryption key has validation logic. # if isinstance(error_orig, TypeError) and aes_decryption_key is None: # error_message += "The key was empty:\n\tline: %s" % line # create_line_error_db_entry(LineEncryptionError.EMPTY_KEY) # error_types.append(LineEncryptionError.EMPTY_KEY) # bad_lines.append(line) # continue # untested, error should be caught as a decryption key error # if isinstance(error_orig, ValueError) and "Key cannot be the null string" in error_string: # error_message += "The key was the null string:\n\tline: %s" % line # create_line_error_db_entry(LineEncryptionError.EMPTY_KEY) # error_types.append(LineEncryptionError.EMPTY_KEY) # bad_lines.append(line) # continue ################### skip these errors ############################## if "unpack" in error_string: error_message += "malformed line of config, dropping it and continuing." create_line_error_db_entry( LineEncryptionError.MALFORMED_CONFIG) error_types.append(LineEncryptionError.MALFORMED_CONFIG) bad_lines.append(line) # the config is not colon separated correctly, this is a single # line error, we can just drop it. # implies an interrupted write operation (or read) continue if "Input strings must be a multiple of 16 in length" in error_string: error_message += "Line was of incorrect length, dropping it and continuing." create_line_error_db_entry(LineEncryptionError.INVALID_LENGTH) error_types.append(LineEncryptionError.INVALID_LENGTH) bad_lines.append(line) continue if isinstance(error_orig, InvalidData): error_message += "Line contained no data, skipping: " + str( line) create_line_error_db_entry(LineEncryptionError.LINE_EMPTY) error_types.append(LineEncryptionError.LINE_EMPTY) bad_lines.append(line) continue if isinstance(error_orig, InvalidIV): error_message += "Line contained no iv, skipping: " + str(line) create_line_error_db_entry(LineEncryptionError.IV_MISSING) error_types.append(LineEncryptionError.IV_MISSING) bad_lines.append(line) continue elif 'IV must be' in error_string: # shifted this to an okay-to-proceed line error March 2021. error_message += "iv has bad length." create_line_error_db_entry(LineEncryptionError.IV_BAD_LENGTH) error_types.append(LineEncryptionError.IV_BAD_LENGTH) bad_lines.append(line) continue # Give up on these errors: # should be handled in decryption key validation. # if 'AES key' in error_string: # error_message += "AES key has bad length." # create_line_error_db_entry(LineEncryptionError.AES_KEY_BAD_LENGTH) # error_types.append(LineEncryptionError.AES_KEY_BAD_LENGTH) # bad_lines.append(line) # raise HandledError(error_message) elif 'Incorrect padding' in error_string: error_message += "base64 padding error, config is truncated." create_line_error_db_entry(LineEncryptionError.MP4_PADDING) error_types.append(LineEncryptionError.MP4_PADDING) bad_lines.append(line) # this is only seen in mp4 files. possibilities: # upload during write operation. # broken base64 conversion in the app # some unanticipated error in the file upload raise HandledError(error_message) else: # If none of the above errors happened, raise the error raw raise if error_count: EncryptionErrorMetadata.objects.create( file_name=request.values['file_name'], total_lines=len(file_data), number_errors=error_count, # generator comprehension: error_lines=json.dumps((str(line for line in bad_lines))), error_types=json.dumps(error_types), participant=participant, ) # join should be rather well optimized and not cause O(n^2) total memory copies return b"\n".join(good_lines)