def test_recalculate_percentile(self): adapter = recalc_util.CachedMCDIAdapter() adapter.max_word_counts['standard'] = 681 adapter.mcdi_models['standard'] = TEST_MCDI_MODEL adapter.percentiles['typical-male'] = TEST_PERCENTILES_MODEL test_snapshot = copy.deepcopy(TEST_SNAPSHOT) self.mox.StubOutWithMock(db_util, 'load_mcdi_model') self.mox.StubOutWithMock(db_util, 'load_snapshot_contents') test_word_1 = models.SnapshotContent(0, '', 1, 0) test_word_2 = models.SnapshotContent(0, '', 2, 0) test_word_3 = models.SnapshotContent(0, '', 3, 0) words_spoken = [test_word_1] * 31 words_spoken.extend([test_word_2] * 22) words_spoken.extend([test_word_3] * 13) db_util.load_snapshot_contents(test_snapshot).AndReturn(words_spoken) self.mox.ReplayAll() recalc_util.recalculate_age(test_snapshot) recalc_util.recalculate_percentile(test_snapshot, adapter) self.assertTrue(abs(test_snapshot.age - 17.71) < 0.01) self.assertEqual(test_snapshot.words_spoken, 53) self.assertEqual(test_snapshot.percentile, 14)
def summarize_snapshots(snapshot_metas): cdi_spoken_set = {} ret_serialization = {} for meta in snapshot_metas: # Get the values that count as "spoken" mcdi_name = meta.mcdi_type cdi_date = meta.session_date if not mcdi_name in cdi_spoken_set: mcdi_info = db_util.load_mcdi_model(mcdi_name) words_spoken_set = mcdi_info.details['count_as_spoken'] cdi_spoken_set[mcdi_name] = words_spoken_set else: words_spoken_set = cdi_spoken_set[mcdi_name] # Parse the words contents = db_util.load_snapshot_contents(meta) for word_info in contents: word = word_info.word value = word_info.value # Replace existing if this snapshot is earlier if value in words_spoken_set: to_enter = not word in ret_serialization to_enter = to_enter or ret_serialization[word] == None to_enter = to_enter or ret_serialization[word] > cdi_date if to_enter: ret_serialization[word] = cdi_date # Report not known if not already reported elif not word in ret_serialization: ret_serialization[word] = None return ret_serialization
def generate_study_report_rows(snapshots_from_study, presentation_format): """Serialize a set of snapshots to a collection of lists of strings. @param snapshots_by_study: The snapshots to serialize. @type snapshots_by_study: Iterable of models.SnapshotMetadata @param presentation_format: The presentation format to use to render the string serialization. @type: presentation_format: models.PresentationFormat @return: List of serialized versions of snapshots with first list with header information. @rtype: List of list of str. """ word_listing_set = set() for snapshot in snapshots_from_study: snapshot_contents = db_util.load_snapshot_contents(snapshot) candidate_word_listing = set(map( lambda x: x.word.encode('utf-8','ignore'), snapshot_contents )) word_listing_set = word_listing_set.union(candidate_word_listing) word_listing = list(word_listing_set) word_listing.sort() serialized_snapshots = map( lambda x: serialize_snapshot(x, presentation_format, word_listing), snapshots_from_study ) header_col = [ 'database id', 'child id', 'study id', 'study', 'gender', 'age', 'birthday', 'session date', 'session num', 'total num sessions', 'words spoken', 'items excluded', 'percentile', 'extra categories', 'revision', 'languages', 'num languages', 'mcdi type', 'hard of hearing', 'deleted' ] header_col.extend(word_listing) cols = [header_col] cols.extend(serialized_snapshots) return zip(*cols)
def recalculate_percentile(snapshot, cached_adapter): """ @type snapshot: SnapshotMetadata """ mcdi_type = snapshot.mcdi_type gender = snapshot.gender individual_words = db_util.load_snapshot_contents(snapshot) snapshot.words_spoken = get_words_spoken( cached_adapter, mcdi_type, individual_words ) snapshot.percentile = recalculate_percentile_raw( cached_adapter, mcdi_type, gender, snapshot.words_spoken, snapshot.age )
def recalculate_percentile(snapshot, cached_adapter): """ @type snapshot: SnapshotMetadata """ mcdi_model = cached_adapter.load_mcdi_model(snapshot.mcdi_type) if mcdi_model == None: mcdi_model = cached_adapter.load_mcdi_model('fullenglishmcdi') meta_percentile_info = mcdi_model.details['percentiles'] gender = snapshot.gender percentiles_name = None if gender == constants.MALE or gender == constants.OTHER_GENDER: percentiles_name = meta_percentile_info['male'] else: percentiles_name = meta_percentile_info['female'] percentiles = cached_adapter.load_percentile_model(percentiles_name) count_as_spoken_vals = mcdi_model.details['count_as_spoken'] individual_words = db_util.load_snapshot_contents(snapshot) words_spoken = 0 for word in individual_words: if word.value in count_as_spoken_vals: words_spoken += 1 snapshot.words_spoken = words_spoken new_percentile = math_util.find_percentile( percentiles.details, snapshot.words_spoken, snapshot.age, cached_adapter.get_max_mcdi_words(snapshot.mcdi_type) ) snapshot.percentile = new_percentile
def test_summarize_snapshots(self): test_snap_1 = TEST_SNAPSHOT.clone() test_snap_1.mcdi_type = 'mcdi_type_1' test_snap_1.session_date = '2015/01/01' test_snap_2 = TEST_SNAPSHOT.clone() test_snap_2.mcdi_type = 'mcdi_type_1' test_snap_2.session_date = '2015/02/01' test_snap_3 = TEST_SNAPSHOT.clone() test_snap_3.mcdi_type = 'mcdi_type_2' test_snap_3.session_date = '2015/03/01' test_metadata = [test_snap_1, test_snap_2, test_snap_3] test_contents_1 = [ models.SnapshotContent(0, 'word1', 1, 1), models.SnapshotContent(0, 'word2', 0, 1), models.SnapshotContent(0, 'word3', 0, 1) ] test_contents_2 = [ models.SnapshotContent(0, 'word1', 1, 1), models.SnapshotContent(0, 'word2', 2, 1), models.SnapshotContent(0, 'word3', 0, 1) ] test_contents_3 = [ models.SnapshotContent(0, 'word1', 1, 1), models.SnapshotContent(0, 'word2', 1, 1), models.SnapshotContent(0, 'word3', 1, 1), models.SnapshotContent(0, 'word4', 2, 1) ] self.mox.StubOutWithMock(db_util, 'load_mcdi_model') self.mox.StubOutWithMock(db_util, 'load_snapshot_contents') db_util.load_mcdi_model('mcdi_type_1').AndReturn( models.MCDIFormat('', '', '', {'count_as_spoken': [1, 2]}) ) db_util.load_snapshot_contents(test_metadata[0]).AndReturn( test_contents_1 ) db_util.load_snapshot_contents(test_metadata[1]).AndReturn( test_contents_2 ) db_util.load_mcdi_model('mcdi_type_2').AndReturn( models.MCDIFormat('', '', '', {'count_as_spoken': [1]}) ) db_util.load_snapshot_contents(test_metadata[2]).AndReturn( test_contents_3 ) self.mox.ReplayAll() serialization = report_util.summarize_snapshots(test_metadata) self.assertEqual(serialization['word1'], '2015/01/01') self.assertEqual(serialization['word2'], '2015/02/01') self.assertEqual(serialization['word3'], '2015/03/01') self.assertEqual(serialization['word4'], None)
def serialize_snapshot(snapshot, presentation_format=None, word_listing=None, report_dict=False, include_words=True): """Turn a snapshot uft8 encoded list of strings. @param snapshot: The snapshot to serialize. @type snapshot: models.SnapshotMetadata @param presentation_format: The presentation format to use to render the string serialization. @type presentation_format: models.PresentationFormat @return: Serialized version of the snapshot. @rtype: List of str """ if not word_listing: word_listing = [] if include_words: snapshot_contents = db_util.load_snapshot_contents(snapshot) snapshot_contents_dict = {} for entry in snapshot_contents: snapshot_contents_dict[entry.word.lower().replace('*', '')] = entry not_found_entry = NotFoundSnapshotContent() snapshot_contents_sorted = map( lambda x: snapshot_contents_dict.get(x.lower().replace('*', ''), not_found_entry), word_listing ) word_values = map( lambda x: interpret_word_value(x.value, presentation_format), snapshot_contents_sorted ) if report_dict: gender = interpret_word_value(snapshot.gender, presentation_format) extra_categories = interpret_word_value(snapshot.extra_categories, presentation_format) return_dict = { 'database_id': snapshot.database_id, 'child_id': snapshot.child_id, 'study_id': snapshot.study_id, 'study': snapshot.study, 'gender': gender, 'age': snapshot.age, 'birthday': snapshot.birthday, 'session_date': snapshot.session_date, 'session_num': snapshot.session_num, 'total_num_sessions': snapshot.total_num_sessions, 'words_spoken': snapshot.words_spoken, 'items_excluded': snapshot.items_excluded, 'percentile': snapshot.percentile, 'extra_categories': extra_categories, 'revision': snapshot.revision, 'languages': snapshot.languages, 'num_languages': snapshot.num_languages, 'mcdi_type': snapshot.mcdi_type, 'hard_of_hearing': snapshot.hard_of_hearing, 'deleted': snapshot.deleted } if include_words: return_dict['words'] = word_values return return_dict else: return_list = [ snapshot.database_id, snapshot.child_id, snapshot.study_id, snapshot.study, interpret_word_value(snapshot.gender, presentation_format), snapshot.age, snapshot.birthday, snapshot.session_date, snapshot.session_num, snapshot.total_num_sessions, snapshot.words_spoken, snapshot.items_excluded, snapshot.percentile, interpret_word_value(snapshot.extra_categories, presentation_format), snapshot.revision, snapshot.languages, snapshot.num_languages, snapshot.mcdi_type, snapshot.hard_of_hearing, snapshot.deleted ] if include_words: return_list.extend(word_values) return_list = map( lambda x: x.encode('utf-8','ignore') if isinstance(x, str) else x, return_list ) return return_list