def __setup_test_cdi(self, run_automaton_actions=True): self.mox.StubOutWithMock(db_util, "load_mcdi_model") db_util.load_mcdi_model("test_mcdi_type").AndReturn(models.MCDIFormat( "test MCDI type", "test_mcdi_type", "test_mcdi_type.yaml", { "options": [ {"value": 0}, {"value": 1} ], "categories": [ {"words": ["word1", "word2"]}, {"words": ["word3", "word4"]} ], "count_as_spoken": [1] } )) if run_automaton_actions: self.mox.ReplayAll() self.__test_automaton.parse_header(CORRECT_TEST_HEADER_VALUES) self.__test_automaton.parse_cdi_type("test_mcdi_type")
def generate_study_report_csv(snapshots_from_study, presentation_format): """Generate a CSV file for a set of snapshots with the same MCDI format. @param snapshots_from_study: The snapshots to create a CSV report for. @type snapshots_from_study: Iterable over models.SnapshotMetadata @param presentation_format: The presentation format to use to render the string serialization. @type: presentation_format: models.PresentationFormat @return: Contents of the CSV file. @rtype: StringIO.StringIO """ faux_file = string_io.StringIO() csv_writer = csv.writer(faux_file) mcdi_type_name = snapshots_from_study[0].mcdi_type safe_mcdi_name = mcdi_type_name.replace(' ', '') safe_mcdi_name = urllib.quote_plus(safe_mcdi_name).lower() mcdi_format = db_util.load_mcdi_model(safe_mcdi_name) if mcdi_format == None: mcdi_format = db_util.load_mcdi_model(DEFAULT_MCDI) rows = generate_study_report_rows(snapshots_from_study, presentation_format) rows = sort_by_study_order(rows, mcdi_format) csv_writer.writerows( [[unicode(val).encode('ascii', 'ignore') for val in row] for row in rows] ) return faux_file
def test_state_parse_mcdi_type_incorrect_words(self): self.mox.StubOutWithMock(db_util, "load_mcdi_model") db_util.load_mcdi_model("test_mcdi_type").AndReturn(models.MCDIFormat( "test MCDI type", "test_mcdi_type", "test_mcdi_type.yaml", { "options": [ {"value": 0}, {"value": 1} ], "categories": [ {"words": ["word1", "word2"]}, {"words": ["word3", "word4"]} ], "count_as_spoken": [1] } )) self.mox.ReplayAll() self.__test_automaton.parse_header([ "database id", "child id", "study id", "study", "gender", "age", "birthday", "session date", "session num", "total num sessions", "words spoken", "items excluded", "percentile", "extra categories", "revision", "languages", "num languages", "mcdi type", "hard of hearing", "deleted", "word1", "word2", "word3", "wor5" ]) self.__test_automaton.parse_cdi_type("test_mcdi_type") self.assertEqual( self.__test_automaton.get_state(), new_csv_import_util.STATE_FOUND_ERROR )
def test_load_mcdi_model(self): self.mox.StubOutWithMock(db_util, 'load_mcdi_model') db_util.load_mcdi_model('test_format').AndReturn(TEST_MCDI_MODEL) self.mox.ReplayAll() adapter = recalc_util.CachedMCDIAdapter() result_1 = adapter.load_mcdi_model('test_format') result_2 = adapter.load_mcdi_model('test_format') self.assertEqual(result_1, TEST_MCDI_MODEL) self.assertEqual(result_2, TEST_MCDI_MODEL)
def test_state_parse_mcdi_type_invalid(self): self.mox.StubOutWithMock(db_util, "load_mcdi_model") db_util.load_mcdi_model("other_test_mcdi_type").AndReturn(None) self.mox.ReplayAll() self.__test_automaton.parse_header(CORRECT_TEST_HEADER_VALUES) self.__test_automaton.parse_cdi_type("other_test_mcdi_type") self.assertEqual( self.__test_automaton.get_state(), new_csv_import_util.STATE_FOUND_ERROR )
def load_mcdi_model(self, type_name): if type_name in self.mcdi_models: return self.mcdi_models[type_name] mcdi_model = db_util.load_mcdi_model(type_name) self.mcdi_models[type_name] = mcdi_model return mcdi_model
def summarize_snapshots(snapshot_metas): cdi_spoken_set = {} ret_serialization = {} for meta in snapshot_metas: # Get the values that count as "spoken" mcdi_name = meta.mcdi_type cdi_date = meta.session_date if not mcdi_name in cdi_spoken_set: mcdi_info = db_util.load_mcdi_model(mcdi_name) words_spoken_set = mcdi_info.details['count_as_spoken'] cdi_spoken_set[mcdi_name] = words_spoken_set else: words_spoken_set = cdi_spoken_set[mcdi_name] # Parse the words contents = db_util.load_snapshot_contents(meta) for word_info in contents: word = word_info.word value = word_info.value # Replace existing if this snapshot is earlier if value in words_spoken_set: to_enter = not word in ret_serialization to_enter = to_enter or ret_serialization[word] == None to_enter = to_enter or ret_serialization[word] > cdi_date if to_enter: ret_serialization[word] = cdi_date # Report not known if not already reported elif not word in ret_serialization: ret_serialization[word] = None return ret_serialization
def parse_csv(contents, mcdi_type, languages, hard_of_hearing, act_as_file=False): mcdi_model = db_util.load_mcdi_model(mcdi_type) percentile_names = mcdi_model.details['percentiles'] male_percentiles_name = percentile_names['male'] female_percentiles_name = percentile_names['female'] other_percentiles_name = percentile_names['other'] male_percentiles = db_util.load_percentile_model(male_percentiles_name) female_percentiles = db_util.load_percentile_model(female_percentiles_name) other_percentiles = db_util.load_percentile_model(other_percentiles_name) percentile_tables = { constants.MALE: male_percentiles, constants.FEMALE: female_percentiles, constants.OTHER_GENDER: other_percentiles } connection = db_util.get_db_connection() cursor = connection.cursor() parse_info = parse_csv_prototypes(contents, percentile_tables, act_as_file) if parse_info['error']: connection.commit() connection.close() return {'error': parse_info['error']} prototypes = parse_info['prototypes'] ids = map(lambda x: x['child_id'], prototypes) for prototype in prototypes: build_snapshot(prototype, mcdi_type, languages, hard_of_hearing, cursor) connection.commit() connection.close() return {'error': None, 'ids': ids}
def test_summarize_snapshots(self): test_snap_1 = TEST_SNAPSHOT.clone() test_snap_1.mcdi_type = 'mcdi_type_1' test_snap_1.session_date = '2015/01/01' test_snap_2 = TEST_SNAPSHOT.clone() test_snap_2.mcdi_type = 'mcdi_type_1' test_snap_2.session_date = '2015/02/01' test_snap_3 = TEST_SNAPSHOT.clone() test_snap_3.mcdi_type = 'mcdi_type_2' test_snap_3.session_date = '2015/03/01' test_metadata = [test_snap_1, test_snap_2, test_snap_3] test_contents_1 = [ models.SnapshotContent(0, 'word1', 1, 1), models.SnapshotContent(0, 'word2', 0, 1), models.SnapshotContent(0, 'word3', 0, 1) ] test_contents_2 = [ models.SnapshotContent(0, 'word1', 1, 1), models.SnapshotContent(0, 'word2', 2, 1), models.SnapshotContent(0, 'word3', 0, 1) ] test_contents_3 = [ models.SnapshotContent(0, 'word1', 1, 1), models.SnapshotContent(0, 'word2', 1, 1), models.SnapshotContent(0, 'word3', 1, 1), models.SnapshotContent(0, 'word4', 2, 1) ] self.mox.StubOutWithMock(db_util, 'load_mcdi_model') self.mox.StubOutWithMock(db_util, 'load_snapshot_contents') db_util.load_mcdi_model('mcdi_type_1').AndReturn( models.MCDIFormat('', '', '', {'count_as_spoken': [1, 2]}) ) db_util.load_snapshot_contents(test_metadata[0]).AndReturn( test_contents_1 ) db_util.load_snapshot_contents(test_metadata[1]).AndReturn( test_contents_2 ) db_util.load_mcdi_model('mcdi_type_2').AndReturn( models.MCDIFormat('', '', '', {'count_as_spoken': [1]}) ) db_util.load_snapshot_contents(test_metadata[2]).AndReturn( test_contents_3 ) self.mox.ReplayAll() serialization = report_util.summarize_snapshots(test_metadata) self.assertEqual(serialization['word1'], '2015/01/01') self.assertEqual(serialization['word2'], '2015/02/01') self.assertEqual(serialization['word3'], '2015/03/01') self.assertEqual(serialization['word4'], None)