def test_yes_no_amb(self):
        self.assertEqual(FoldStrategies.yes_no_amb(Codes.YES, Codes.YES), Codes.YES)
        self.assertEqual(FoldStrategies.yes_no_amb(Codes.NO, Codes.NO), Codes.NO)
        self.assertEqual(FoldStrategies.yes_no_amb(Codes.YES, Codes.NO), Codes.AMBIVALENT)
        self.assertEqual(FoldStrategies.yes_no_amb(Codes.NOT_CODED, Codes.NOT_CODED), Codes.NOT_CODED)

        # TODO: Check that this test case is desired
        self.assertEqual(FoldStrategies.yes_no_amb(Codes.NOT_REVIEWED, Codes.YES), Codes.YES)
    def test_assert_equal(self):
        self.assertEqual(FoldStrategies.assert_equal("5", "5"), "5")

        try:
            FoldStrategies.assert_equal("6", "7")
            self.fail("No AssertionError raised")
        except AssertionError as e:
            if str(e) == "No AssertionError raised":
                raise e

            self.assertEqual(str(e),
                             "Values should be the same but are different "
                             "(differing values were '6' and '7')")
Пример #3
0
def get_follow_up_coding_plans(pipeline_name):
    return [
        CodingPlan(
            raw_field="s09_have_voice_raw",
            time_field="s09_have_voice_time",
            coda_filename="TIS_Plus_s09_have_voice.json",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.SINGLE,
                    code_scheme=CodeSchemes.S09_HAVE_VOICE,
                    cleaner=somali.DemographicCleaner.clean_yes_no,
                    coded_field="s09_have_voice_coded",
                    analysis_file_key="s09_have_voice",
                    fold_strategy=FoldStrategies.assert_label_ids_equal)
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "have voice"),
            raw_field_fold_strategy=FoldStrategies.assert_equal),
        CodingPlan(
            raw_field="s09_suggestions_raw",
            time_field="s09_suggestions_time",
            coda_filename="TIS_Plus_s09_suggestions.json",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.MULTIPLE,
                    code_scheme=CodeSchemes.S09_SUGGESTIONS,
                    coded_field="s09_suggestions_coded",
                    analysis_file_key="s09_suggestions",
                    fold_strategy=lambda x, y: FoldStrategies.list_of_labels(
                        CodeSchemes.S09_SUGGESTIONS, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "suggestions"),
            raw_field_fold_strategy=FoldStrategies.assert_equal)
    ]
 def test_concatenate(self):
     self.assertEqual(FoldStrategies.concatenate("abc", "def"), "abc;def")
     self.assertEqual(FoldStrategies.concatenate("abc", ""), "abc;")
     self.assertEqual(FoldStrategies.concatenate("abc", None), "abc")
     self.assertEqual(FoldStrategies.concatenate("", "def"), ";def")
     self.assertEqual(FoldStrategies.concatenate(None, "def"), "def")
     self.assertEqual(FoldStrategies.concatenate(None, None), None)
def get_follow_up_coding_plans(pipeline_name):
    return [
        CodingPlan(
            raw_field="rqa_s01_programme_evaluation_raw",
            time_field="sent_on",
            coda_filename="OXFAM_WASH_s01_Programme_Evaluation.json",
            icr_filename="oxfam_programme_evaluation.csv",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.MULTIPLE,
                    code_scheme=CodeSchemes.S01_PROGRAMME_EVALUATION,
                    coded_field="rqa_s01_programme_evaluation_coded",
                    analysis_file_key="rqa_s01_programme_evaluation",
                    fold_strategy=lambda x, y: FoldStrategies.list_of_labels(
                        CodeSchemes.S01_PROGRAMME_EVALUATION, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "OXFAM WASH s01 Programme Evaluation"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01_accountability_raw",
            time_field="sent_on",
            coda_filename="OXFAM_WASH_s01_Accountability.json",
            run_id_field="rqa_s01_accountability_run_id",
            icr_filename="oxfam_accountability.csv",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.MULTIPLE,
                    code_scheme=CodeSchemes.S01_ACCOUNTABILITY,
                    coded_field="rqa_s01_accountability_coded",
                    analysis_file_key="rqa_s01_accountability",
                    fold_strategy=lambda x, y: FoldStrategies.list_of_labels(
                        CodeSchemes.S01_ACCOUNTABILITY, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "OXFAM WASH s01 Accountability"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
    ]
Пример #6
0
def get_rqa_coding_plans(pipeline_name):
    return [
        CodingPlan(raw_field="rqa_s01e01_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e01_run_id",
                   coda_filename="TEST_PIPELINE_DANIEL_s01e01.json",
                   icr_filename="s01e01.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E01,
                           coded_field="rqa_s01e01_coded",
                           analysis_file_key="rqa_s01e01",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E01, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("s01e01"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),
    ]
    def test_assert_label_ids_equal(self):
        self.assertEqual(FoldStrategies.assert_label_ids_equal(
            Label("scheme-1", "code-2", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict(),
            Label("scheme-1", "code-2", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict()
        ), Label("scheme-1", "code-2", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict())

        self.assertEqual(FoldStrategies.assert_label_ids_equal(
            Label("scheme-1", "code-2", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict(),
            Label("scheme-1", "code-2", "2019-10-14T12:20:14Z", Origin("y", "test-2", "manual")).to_dict()
        ), Label("scheme-1", "code-2", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict())

        try:
            FoldStrategies.assert_label_ids_equal(
                Label("scheme-1", "code-1", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict(),
                Label("scheme-1", "code-2", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict()
            ),
            self.fail("No AssertionError raised")
        except AssertionError as e:
            if str(e) == "No AssertionError raised":
                raise e

            self.assertEqual(str(e),
                             "Labels should have the same SchemeID and CodeID, but at least one of those is different "
                             "(differing values were {'SchemeID': 'scheme-1', 'CodeID': 'code-1'} "
                             "and {'SchemeID': 'scheme-1', 'CodeID': 'code-2'})")

        try:
            FoldStrategies.assert_label_ids_equal(
                Label("scheme-1", "code-2", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict(),
                Label("scheme-2", "code-2", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict()
            ),
            self.fail("No AssertionError raised")
        except AssertionError as e:
            if str(e) == "No AssertionError raised":
                raise e

            self.assertEqual(str(e),
                             "Labels should have the same SchemeID and CodeID, but at least one of those is different "
                             "(differing values were {'SchemeID': 'scheme-1', 'CodeID': 'code-2'} "
                             "and {'SchemeID': 'scheme-2', 'CodeID': 'code-2'})")
def get_rqa_coding_plans(pipeline_name):
    return [
        CodingPlan(raw_field="rqa_s01e01_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e01_run_id",
                   coda_filename="UNICEF_COVID19_SOM_s01e01.json",
                   icr_filename="rqa_s01e01.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E01,
                           coded_field="rqa_s01e01_coded",
                           analysis_file_key="rqa_s01e01",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E01, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET_SCHEME.get_code_with_match_value("s01e01"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e02_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e02_run_id",
                   coda_filename="UNICEF_COVID19_SOM_s01e02.json",
                   icr_filename="rqa_s01e02.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E02,
                           coded_field="rqa_s01e02_coded",
                           analysis_file_key="rqa_s01e02",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E02, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET_SCHEME.get_code_with_match_value("s01e02"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e03_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e03_run_id",
                   coda_filename="UNICEF_COVID19_SOM_s01e03.json",
                   icr_filename="rqa_s01e03.csv",
                   katikati_survey_time_ranges=[("2020-07-24T14:00:00+03:00","2020-07-26T00:00:00+03:00")],
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E03,
                           coded_field="rqa_s01e03_coded",
                           analysis_file_key="rqa_s01e03",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E03, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET_SCHEME.get_code_with_match_value("s01e03"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e04_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e04_run_id",
                   coda_filename="UNICEF_COVID19_SOM_s01e04.json",
                   icr_filename="rqa_s01e04.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E04,
                           coded_field="rqa_s01e04_coded",
                           analysis_file_key="rqa_s01e04",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E04, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET_SCHEME.get_code_with_match_value("s01e04"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="csap_kalkaal_consent_raw",
                   time_field="sent_on",
                   run_id_field="csap_kalkaal_consent_run_id",
                   coda_filename="UNICEF_COVID19_SOM_csap_kalkaal_consent.json",
                   icr_filename="csap_kalkaal_consent.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.CSAP_KALKAAL_CONSENT,
                           coded_field="csap_kalkaal_consent_coded",
                           analysis_file_key="csap_kalkaal_consent",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.CSAP_KALKAAL_CONSENT, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET_SCHEME.get_code_with_match_value("csap kalkaal consent"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="s01_closeout_raw",
                   time_field="sent_on",
                   run_id_field="s01_closeout_run_id",
                   coda_filename="UNICEF_COVID19_SOM_s01_closeout.json",
                   icr_filename="s01_closeout.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01_CLOSEOUT,
                           coded_field="s01_closeout_coded",
                           analysis_file_key="s01_closeout",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01_CLOSEOUT, x,
                                                                                    y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET_SCHEME.get_code_with_match_value("s01 closeout"),
                   raw_field_fold_strategy=FoldStrategies.concatenate)
    ]
    def test_fold_list_of_labels(self):
        na_code = Code("code-NA", "Control", "NA", -10, "NA", True, control_code=Codes.TRUE_MISSING)
        nr_code = Code("code-NR", "Control", "NR", -20, "NR", True, control_code=Codes.NOT_REVIEWED)
        nc_code = Code("code-NC", "Control", "NC", -30, "NC", True, control_code=Codes.NOT_CODED)
        normal_1_code = Code("code-normal-1", "Normal", "Normal 1", 1, "normal_1", True)
        normal_2_code = Code("code-normal-2", "Normal", "Normal 2", 2, "normal_2", True)
        scheme_1 = CodeScheme("scheme-1", "Scheme 1", "1", [na_code, nr_code, nc_code, normal_1_code, normal_2_code])

        scheme_2 = CodeScheme("scheme-2", "Scheme 2", "2", [])

        na_label = Label("scheme-1", "code-NA", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict()
        nr_label = Label("scheme-1", "code-NR", "2019-10-01T12:25:18Z", Origin("x", "test", "automatic")).to_dict()
        nc_label = Label("scheme-1", "code-NC", "2019-10-01T12:30:00Z", Origin("x", "test", "automatic")).to_dict()
        na_label_2 = Label("scheme-1", "code-NA", "2019-10-01T13:00:00Z", Origin("x", "test", "automatic")).to_dict()
        normal_1_label = Label("scheme-1", "code-normal-1", "2019-10-01T12:20:14Z", Origin("x", "test", "automatic")).to_dict()
        normal_1_label_2 = Label("scheme-1", "code-normal-1", "2019-10-03T00:00:00Z", Origin("x", "test", "automatic")).to_dict()
        normal_2_label = Label("scheme-1", "code-normal-2", "2019-10-01T15:00:00Z", Origin("x", "test", "automatic")).to_dict()

        # Test empty lists are rejected
        self.assertRaises(AssertionError, lambda: FoldStrategies.list_of_labels(scheme_1, [], []))
        self.assertRaises(AssertionError, lambda: FoldStrategies.list_of_labels(scheme_1, [na_label], []))

        # Test lists containing only NA labels return a single NA label
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [na_label], [na_label]), [na_label])
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [na_label], [na_label_2]), [na_label])

        # Test lists containing an NA label and another label (including another NA label) are rejected
        self.assertRaises(AssertionError, lambda: FoldStrategies.list_of_labels(scheme_1, [na_label, na_label], [na_label]))
        self.assertRaises(AssertionError, lambda: FoldStrategies.list_of_labels(scheme_1, [na_label, normal_1_label], [na_label]))

        # Test folding a normal label with an NA label
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [na_label], [normal_1_label]), [normal_1_label])
        
        # Test folding various combinations of only normal labels
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [normal_1_label], [normal_1_label]), [normal_1_label])
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [normal_1_label, normal_2_label], [normal_1_label]),
                         [normal_1_label, normal_2_label])
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [normal_1_label, normal_2_label], [normal_1_label_2]),
                         [normal_1_label, normal_2_label])

        # Test folding normal labels with a control code that isn't NA or NC
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [normal_1_label, normal_2_label], [nr_label]),
                         [normal_1_label, normal_2_label, nr_label])

        # Test folding a label from a different code scheme
        self.assertRaises(AssertionError, lambda: FoldStrategies.list_of_labels(scheme_2, [normal_1_label], [na_label]))
        # (make sure that test would have been ok with the correct code scheme)
        FoldStrategies.list_of_labels(scheme_1, [normal_1_label], [na_label])

        # Test folding normal codes with NC codes
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [nc_label], [nc_label]), [nc_label])
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [na_label], [nc_label]), [nc_label])
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [normal_1_label], [nc_label]), [normal_1_label])
        self.assertEqual(FoldStrategies.list_of_labels(scheme_1, [normal_1_label], [normal_2_label, nc_label]),
                         [normal_1_label, normal_2_label])
 def test_matrix(self):
     self.assertEqual(FoldStrategies.matrix(Codes.MATRIX_1, Codes.MATRIX_1), Codes.MATRIX_1)
     self.assertEqual(FoldStrategies.matrix(Codes.MATRIX_0, Codes.MATRIX_1), Codes.MATRIX_1)
     self.assertEqual(FoldStrategies.matrix(Codes.MATRIX_1, Codes.MATRIX_0), Codes.MATRIX_1)
     self.assertEqual(FoldStrategies.matrix(Codes.MATRIX_0, Codes.MATRIX_0), Codes.MATRIX_0)
def get_rqa_coding_plans(pipeline_name):
    return [
        CodingPlan(raw_field="rqa_s01e01_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e01_run_id",
                   coda_filename="UNDP_Kenya_s01e01.json",
                   icr_filename="s01e01.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E01,
                           coded_field="rqa_s01e01_coded",
                           analysis_file_key="rqa_s01e01",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E01, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01e01"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e02_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e02_run_id",
                   coda_filename="UNDP_Kenya_s01e02.json",
                   icr_filename="s01e02.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E02,
                           coded_field="rqa_s01e02_coded",
                           analysis_file_key="rqa_s01e02",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E02, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01e02"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e03_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e03_run_id",
                   coda_filename="UNDP_Kenya_s01e03.json",
                   icr_filename="s01e03.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E03,
                           coded_field="rqa_s01e03_coded",
                           analysis_file_key="rqa_s01e03",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E03, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01e03"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e04_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e04_run_id",
                   coda_filename="UNDP_Kenya_s01e04.json",
                   icr_filename="s01e04.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E04,
                           coded_field="rqa_s01e04_coded",
                           analysis_file_key="rqa_s01e04",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E04, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01e04"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e05_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e05_run_id",
                   coda_filename="UNDP_Kenya_s01e05.json",
                   icr_filename="s01e05.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E05,
                           coded_field="rqa_s01e05_coded",
                           analysis_file_key="rqa_s01e05",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E05, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01e05"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e06_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e06_run_id",
                   coda_filename="UNDP_Kenya_s01e06.json",
                   icr_filename="s01e06.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E06,
                           coded_field="rqa_s01e06_coded",
                           analysis_file_key="rqa_s01e06",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E06, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01e06"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e07_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e07_run_id",
                   coda_filename="UNDP_Kenya_s01e07.json",
                   icr_filename="s01e07.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E07,
                           coded_field="rqa_s01e07_coded",
                           analysis_file_key="rqa_s01e07",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E07, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01e07"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s01e08_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s01e08_run_id",
                   coda_filename="UNDP_Kenya_s01e08.json",
                   icr_filename="s01e08.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01E08,
                           coded_field="rqa_s01e08_coded",
                           analysis_file_key="rqa_s01e08",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01E08, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01e08"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="s01_close_out_raw",
                   time_field="sent_on",
                   run_id_field="s01_close_out_run_id",
                   coda_filename="UNDP_Kenya_s01_close_out.json",
                   icr_filename="s01_close_out.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S01_CLOSE_OUT,
                           coded_field="s01_close_out_coded",
                           analysis_file_key="s01_close_out",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S01_CLOSE_OUT, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("UNDP-Kenya s01 close out"),
                   raw_field_fold_strategy=FoldStrategies.concatenate)
    ]
Пример #12
0
        return Codes.NOT_CODED


S01_RQA_CODING_PLAN = [
        CodingPlan(raw_field="diagnostic_s01e01_raw",
                   time_field="sent_on",
                   run_id_field="diagnostic_s01e01_run_id",
                   coda_filename="COVID19_SOM_s01e01.json",
                   icr_filename="diagnostic_s01e01.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.DIAGNOSTIC_S01E01,
                           coded_field="diagnostic_s01e01_coded",
                           analysis_file_key="diagnostic_s01e01",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.DIAGNOSTIC_S01E01, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET_SCHEME.get_code_with_match_value("covid19 som s01e01"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="diagnostic_s01e02_raw",
                   time_field="sent_on",
                   run_id_field="diagnostic_s01e02_run_id",
                   coda_filename="COVID19_SOM_s01e02.json",
                   icr_filename="diagnostic_s01e02.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.DIAGNOSTIC_S01E02,
                           coded_field="diagnostic_s01e02_coded",
 def test_boolean_or(self):
     self.assertEqual(FoldStrategies.boolean_or(Codes.TRUE, Codes.TRUE), Codes.TRUE)
     self.assertEqual(FoldStrategies.boolean_or(Codes.FALSE, Codes.TRUE), Codes.TRUE)
     self.assertEqual(FoldStrategies.boolean_or(Codes.FALSE, Codes.FALSE), Codes.FALSE)
 def test_control_code(self):
     self.assertEqual(FoldStrategies.control_code_by_precedence(Codes.TRUE_MISSING, Codes.NOT_CODED), Codes.NOT_CODED)
     self.assertEqual(FoldStrategies.control_code_by_precedence(Codes.STOP, Codes.NOT_CODED), Codes.STOP)
def get_rqa_coding_plans(pipeline_name):
    return [
        CodingPlan(
            raw_field="rqa_s01_pilot_raw",
            time_field="sent_on",
            run_id_field="rqa_s01_pilot_run_id",
            coda_filename="COVID19_s01e01.json",
            icr_filename="s01_pilot.csv",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.MULTIPLE,
                    code_scheme=CodeSchemes.S01_PILOT,
                    coded_field="rqa_s01_pilot_coded",
                    analysis_file_key="rqa_s01_pilot_",
                    fold_strategy=lambda x, y: FoldStrategies.list_of_labels(
                        CodeSchemes.S01_PILOT, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "COVID19 s01e01"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e01_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e01_run_id",
            coda_filename="COVID19_KE_Urban_s01e01.json",
            icr_filename="s01e01.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E01,
                                    coded_field="rqa_s01e01_coded",
                                    analysis_file_key="rqa_s01e01_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E01, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "COVID19-KE-Urban s01e01"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e02_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e02_run_id",
            coda_filename="COVID19_KE_Urban_s01e02.json",
            icr_filename="s01e02.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E02,
                                    coded_field="rqa_s01e02_coded",
                                    analysis_file_key="rqa_s01e02_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E02, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "COVID19-KE-Urban s01e02"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e03_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e03_run_id",
            coda_filename="COVID19_KE_Urban_s01e03.json",
            icr_filename="s01e03.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E03,
                                    coded_field="rqa_s01e03_coded",
                                    analysis_file_key="rqa_s01e03_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E03, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "COVID19-KE-Urban s01e03"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e04_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e04_run_id",
            coda_filename="COVID19_KE_Urban_s01e04.json",
            icr_filename="s01e04.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E04,
                                    coded_field="rqa_s01e04_coded",
                                    analysis_file_key="rqa_s01e04_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E04, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "COVID19-KE-Urban s01e04"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e05_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e05_run_id",
            coda_filename="COVID19_KE_Urban_s01e05.json",
            icr_filename="s01e05.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E05,
                                    coded_field="rqa_s01e05_coded",
                                    analysis_file_key="rqa_s01e05_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E05, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "COVID19-KE-Urban s01e05"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e06_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e06_run_id",
            coda_filename="COVID19_KE_Urban_s01e06.json",
            icr_filename="s01e06.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E06,
                                    coded_field="rqa_s01e06_coded",
                                    analysis_file_key="rqa_s01e06_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E06, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "COVID19-KE-Urban s01e06"),
            raw_field_fold_strategy=FoldStrategies.concatenate)
    ]
def get_rqa_coding_plans(pipeline_name):
    return [
        CodingPlan(
            raw_field="rqa_s01e01_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e01_run_id",
            coda_filename="WorldVision_s01e01.json",
            icr_filename="s01e01.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E01,
                                    coded_field="rqa_s01e01_coded",
                                    analysis_file_key="rqa_s01e01_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E01, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "s01e01"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e02_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e02_run_id",
            coda_filename="WorldVision_s01e02.json",
            icr_filename="s01e02.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E02,
                                    coded_field="rqa_s01e02_coded",
                                    analysis_file_key="rqa_s01e02_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E02, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "s01e02"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e03_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e03_run_id",
            coda_filename="WorldVision_s01e03.json",
            icr_filename="s01e03.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E03,
                                    coded_field="rqa_s01e03_coded",
                                    analysis_file_key="rqa_s01e03_",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E03, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "s01e03"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="s01_close_out_raw",
            time_field="sent_on",
            run_id_field="s01_close_out_run_id",
            coda_filename="WorldVision_s01_close_out.json",
            icr_filename="s01_close_out.csv",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.MULTIPLE,
                    code_scheme=CodeSchemes.S01_CLOSE_OUT,
                    coded_field="s01_close_out_coded",
                    analysis_file_key="s01_close_out_",
                    fold_strategy=lambda x, y: FoldStrategies.list_of_labels(
                        CodeSchemes.S01_CLOSE_OUT, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "s01 close-out"),
            raw_field_fold_strategy=FoldStrategies.concatenate)
    ]
class PipelineConfiguration(object):
    RQA_CODING_PLANS = [
        CodingPlan(raw_field="rqa_s06e01_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s06e01_run_id",
                   coda_filename="s06e01.json",
                   icr_filename="s06e01.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S06E01_REASONS,
                           coded_field="rqa_s06e01_coded",
                           analysis_file_key="rqa_s06e01_",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S06E01_REASONS, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("s06e01"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),

        CodingPlan(raw_field="rqa_s06e02_raw",
                   time_field="sent_on",
                   run_id_field="rqa_s06e02_run_id",
                   coda_filename="s06e02.json",
                   icr_filename="s06e02.csv",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.S06E02_REASONS,
                           coded_field="rqa_s06e02_coded",
                           analysis_file_key="rqa_s06e02_",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.S06E02_REASONS, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("s06e02"),
                   raw_field_fold_strategy=FoldStrategies.concatenate),
    ]

    @staticmethod
    def clean_age_with_range_filter(text):
        """
        Cleans age from the given `text`, setting to NC if the cleaned age is not in the range 10 <= age < 100.
        """
        age = swahili.DemographicCleaner.clean_age(text)
        if type(age) == int and 10 <= age < 100:
            return str(age)
            # TODO: Once the cleaners are updated to not return Codes.NOT_CODED, this should be updated to still return
            #       NC in the case where age is an int but is out of range
        else:
            return Codes.NOT_CODED

    @staticmethod
    def clean_district_if_no_mogadishu_sub_district(text):
        mogadishu_sub_district = somali.DemographicCleaner.clean_mogadishu_sub_district(text)
        if mogadishu_sub_district == Codes.NOT_CODED:
            return somali.DemographicCleaner.clean_somalia_district(text)
        else:
            return Codes.NOT_CODED

    SURVEY_CODING_PLANS = [
        CodingPlan(raw_field="operator_raw",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.SOMALIA_OPERATOR,
                           coded_field="operator_coded",
                           analysis_file_key="operator",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       )
                   ],
                   raw_field_fold_strategy=FoldStrategies.assert_equal),

        CodingPlan(raw_field="location_raw",
                   time_field="location_time",
                   coda_filename="location.json",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.MOGADISHU_SUB_DISTRICT,
                           cleaner=somali.DemographicCleaner.clean_mogadishu_sub_district,
                           coded_field="mogadishu_sub_district_coded",
                           # This code exists for compatibility with the previous CSAP demog datasets.
                           # Not including in the analysis file because this project is not in Mogadishu.
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       ),
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.SOMALIA_DISTRICT,
                           cleaner=lambda text: PipelineConfiguration.clean_district_if_no_mogadishu_sub_district(text),
                           coded_field="district_coded",
                           analysis_file_key="district",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       ),
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.SOMALIA_REGION,
                           coded_field="region_coded",
                           analysis_file_key="region",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       ),
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.SOMALIA_STATE,
                           coded_field="state_coded",
                           analysis_file_key="state",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       ),
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.SOMALIA_ZONE,
                           coded_field="zone_coded",
                           analysis_file_key="zone",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       )
                   ],
                   code_imputation_function=code_imputation_functions.impute_somalia_location_codes,
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("location"),
                   raw_field_fold_strategy=FoldStrategies.assert_equal),

        CodingPlan(raw_field="gender_raw",
                   time_field="gender_time",
                   coda_filename="gender.json",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.GENDER,
                           cleaner=somali.DemographicCleaner.clean_gender,
                           coded_field="gender_coded",
                           analysis_file_key="gender",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("gender"),
                   raw_field_fold_strategy=FoldStrategies.assert_equal),

        CodingPlan(raw_field="age_raw",
                   time_field="age_time",
                   coda_filename="age.json",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.AGE,
                           cleaner=lambda text: PipelineConfiguration.clean_age_with_range_filter(text),
                           coded_field="age_coded",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       ),
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.AGE_CATEGORY,
                           coded_field="age_category_coded",
                           analysis_file_key="age_category",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       )
                   ],
                   code_imputation_function=code_imputation_functions.impute_age_category,
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("age"),
                   raw_field_fold_strategy=FoldStrategies.assert_equal),

        CodingPlan(raw_field="recently_displaced_raw",
                   time_field="recently_displaced_time",
                   coda_filename="recently_displaced.json",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.RECENTLY_DISPLACED,
                           cleaner=somali.DemographicCleaner.clean_yes_no,
                           coded_field="recently_displaced_coded",
                           analysis_file_key="recently_displaced",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("recently displaced"),
                   raw_field_fold_strategy=FoldStrategies.assert_equal),

        CodingPlan(raw_field="in_idp_camp_raw",
                   time_field="in_idp_camp_time",
                   coda_filename="in_idp_camp.json",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.IN_IDP_CAMP,
                           cleaner=somali.DemographicCleaner.clean_yes_no,
                           coded_field="in_idp_camp_coded",
                           analysis_file_key="in_idp_camp",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("in idp camp"),
                   raw_field_fold_strategy=FoldStrategies.assert_equal),

        CodingPlan(raw_field="have_voice_raw",
                   time_field="have_voice_time",
                   coda_filename="have_voice.json",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.SINGLE,
                           code_scheme=CodeSchemes.HAVE_VOICE,
                           cleaner=somali.DemographicCleaner.clean_yes_no,
                           coded_field="have_voice_coded",
                           analysis_file_key="have_voice",
                           fold_strategy=FoldStrategies.assert_label_ids_equal
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("have voice"),
                   raw_field_fold_strategy=FoldStrategies.assert_equal),

        CodingPlan(raw_field="suggestions_raw",
                   time_field="suggestions_time",
                   coda_filename="suggestions.json",
                   coding_configurations=[
                       CodingConfiguration(
                           coding_mode=CodingModes.MULTIPLE,
                           code_scheme=CodeSchemes.SUGGESTIONS,
                           coded_field="suggestions_coded",
                           analysis_file_key="suggestions_",
                           fold_strategy=lambda x, y: FoldStrategies.list_of_labels(CodeSchemes.SUGGESTIONS, x, y)
                       )
                   ],
                   ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value("suggestions"),
                   raw_field_fold_strategy=FoldStrategies.assert_equal)
    ]

    def __init__(self, pipeline_name, raw_data_sources, phone_number_uuid_table, timestamp_remappings,
                 rapid_pro_key_remappings, project_start_date, project_end_date, filter_test_messages, move_ws_messages,
                 memory_profile_upload_url_prefix, data_archive_upload_url_prefix, drive_upload=None):
        """
        :param pipeline_name: The name of this pipeline.
        :type pipeline_name: str
        :param raw_data_sources: List of sources to pull the various raw run files from.
        :type raw_data_sources: list of RawDataSource
        :param phone_number_uuid_table: Configuration for the Firestore phone number <-> uuid table.
        :type phone_number_uuid_table: PhoneNumberUuidTable
        :param rapid_pro_key_remappings: List of rapid_pro_key -> pipeline_key remappings.
        :type rapid_pro_key_remappings: list of RapidProKeyRemapping
        :param project_start_date: When data collection started - all activation messages received before this date
                                   time will be dropped.
        :type project_start_date: datetime.datetime
        :param project_end_date: When data collection stopped - all activation messages received on or after this date
                                 time will be dropped.
        :type project_end_date: datetime.datetime
        :param filter_test_messages: Whether to filter out messages sent from the rapid_pro_test_contact_uuids
        :type filter_test_messages: bool
        :param move_ws_messages: Whether to move messages labelled as Wrong Scheme to the correct dataset.
        :type move_ws_messages: bool
        :param memory_profile_upload_url_prefix: The prefix of the GS URL to upload the memory profile log to.
                                                 This prefix will be appended by the id of the pipeline run (provided
                                                 as a command line argument), and the ".profile" file extension.
        :type memory_profile_upload_url_prefix: str
        :param drive_upload: Configuration for uploading to Google Drive, or None.
                             If None, does not upload to Google Drive.
        :type drive_upload: DriveUploadPaths | None
        """
        self.pipeline_name = pipeline_name
        self.raw_data_sources = raw_data_sources
        self.phone_number_uuid_table = phone_number_uuid_table
        self.timestamp_remappings = timestamp_remappings
        self.rapid_pro_key_remappings = rapid_pro_key_remappings
        self.project_start_date = project_start_date
        self.project_end_date = project_end_date
        self.filter_test_messages = filter_test_messages
        self.move_ws_messages = move_ws_messages
        self.drive_upload = drive_upload
        self.memory_profile_upload_url_prefix = memory_profile_upload_url_prefix
        self.data_archive_upload_url_prefix = data_archive_upload_url_prefix

        self.validate()

    @classmethod
    def from_configuration_dict(cls, configuration_dict):
        pipeline_name = configuration_dict["PipelineName"]

        raw_data_sources = []
        for raw_data_source in configuration_dict["RawDataSources"]:
            if raw_data_source["SourceType"] == "RapidPro":
                raw_data_sources.append(RapidProSource.from_configuration_dict(raw_data_source))
            elif raw_data_source["SourceType"] == "GCloudBucket":
                raw_data_sources.append(GCloudBucketSource.from_configuration_dict(raw_data_source))
            elif raw_data_source["SourceType"] == "RecoveryCSV":
                raw_data_sources.append(RecoveryCSVSource.from_configuration_dict(raw_data_source))
            else:
                assert False, f"Unknown SourceType '{raw_data_source['SourceType']}'. " \
                              f"Must be 'RapidPro', 'GCloudBucket', or 'RecoveryCSV'."

        phone_number_uuid_table = PhoneNumberUuidTable.from_configuration_dict(
            configuration_dict["PhoneNumberUuidTable"])

        timestamp_remappings = []
        for remapping_dict in configuration_dict.get("TimestampRemappings", []):
            timestamp_remappings.append(TimestampRemapping.from_configuration_dict(remapping_dict))

        rapid_pro_key_remappings = []
        for remapping_dict in configuration_dict["RapidProKeyRemappings"]:
            rapid_pro_key_remappings.append(RapidProKeyRemapping.from_configuration_dict(remapping_dict))

        project_start_date = isoparse(configuration_dict["ProjectStartDate"])
        project_end_date = isoparse(configuration_dict["ProjectEndDate"])

        filter_test_messages = configuration_dict["FilterTestMessages"]
        move_ws_messages = configuration_dict["MoveWSMessages"]

        drive_upload_paths = None
        if "DriveUpload" in configuration_dict:
            drive_upload_paths = DriveUpload.from_configuration_dict(configuration_dict["DriveUpload"])

        memory_profile_upload_url_prefix = configuration_dict["MemoryProfileUploadURLPrefix"]
        data_archive_upload_url_prefix = configuration_dict["DataArchiveUploadURLPrefix"]

        return cls(pipeline_name, raw_data_sources, phone_number_uuid_table, timestamp_remappings,
                   rapid_pro_key_remappings, project_start_date, project_end_date, filter_test_messages,
                   move_ws_messages, memory_profile_upload_url_prefix, data_archive_upload_url_prefix,
                   drive_upload_paths)

    @classmethod
    def from_configuration_file(cls, f):
        return cls.from_configuration_dict(json.load(f))

    def validate(self):
        validators.validate_string(self.pipeline_name, "pipeline_name")

        validators.validate_list(self.raw_data_sources, "raw_data_sources")
        for i, raw_data_source in enumerate(self.raw_data_sources):
            assert isinstance(raw_data_source, RawDataSource), f"raw_data_sources[{i}] is not of type of RawDataSource"
            raw_data_source.validate()

        assert isinstance(self.phone_number_uuid_table, PhoneNumberUuidTable)
        self.phone_number_uuid_table.validate()

        validators.validate_list(self.rapid_pro_key_remappings, "rapid_pro_key_remappings")
        for i, remapping in enumerate(self.rapid_pro_key_remappings):
            assert isinstance(remapping, RapidProKeyRemapping), \
                f"rapid_pro_key_mappings[{i}] is not of type RapidProKeyRemapping"
            remapping.validate()

        validators.validate_datetime(self.project_start_date, "project_start_date")
        validators.validate_datetime(self.project_end_date, "project_end_date")

        validators.validate_bool(self.filter_test_messages, "filter_test_messages")
        validators.validate_bool(self.move_ws_messages, "move_ws_messages")

        if self.drive_upload is not None:
            assert isinstance(self.drive_upload, DriveUpload), \
                "drive_upload is not of type DriveUpload"
            self.drive_upload.validate()

        validators.validate_string(self.memory_profile_upload_url_prefix, "memory_profile_upload_url_prefix")
def get_rqa_coding_plans(pipeline_name):
    return [
        CodingPlan(
            raw_field="rqa_s01e01_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e01_run_id",
            coda_filename="OXFAM_WASH_s01e01.json",
            icr_filename="s01e01.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E01,
                                    coded_field="rqa_s01e01_coded",
                                    analysis_file_key="rqa_s01e01",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E01, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "OXFAM-WASH s01e01"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e02_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e02_run_id",
            coda_filename="OXFAM_WASH_s01e02.json",
            icr_filename="s01e02.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E02,
                                    coded_field="rqa_s01e02_coded",
                                    analysis_file_key="rqa_s01e02",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E02, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "OXFAM-WASH s01e02"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e03_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e03_run_id",
            coda_filename="OXFAM_WASH_s01e03.json",
            icr_filename="s01e03.csv",
            coding_configurations=[
                CodingConfiguration(coding_mode=CodingModes.MULTIPLE,
                                    code_scheme=CodeSchemes.S01E03,
                                    coded_field="rqa_s01e03_coded",
                                    analysis_file_key="rqa_s01e03",
                                    fold_strategy=lambda x, y: FoldStrategies.
                                    list_of_labels(CodeSchemes.S01E03, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "OXFAM-WASH s01e03"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="oxfam_beneficiary_consent_raw",
            time_field="sent_on",
            run_id_field="oxfam_beneficiary_consent_run_id",
            coda_filename="OXFAM_WASH_Beneficiary_Consent.json",
            icr_filename="oxfam_beneficiary_consent.csv",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.MULTIPLE,
                    code_scheme=CodeSchemes.BENEFICIARY_CONSENT,
                    coded_field="oxfam_beneficiary_consent_coded",
                    analysis_file_key="oxfam_beneficiary_consent",
                    fold_strategy=lambda x, y: FoldStrategies.list_of_labels(
                        CodeSchemes.BENEFICIARY_CONSENT, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "OXFAM WASH Beneficiary Consent"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="rqa_s01e03_noise_handler_raw",
            time_field="sent_on",
            run_id_field="rqa_s01e03_noise_handler_run_id",
            coda_filename="OXFAM_WASH_s01e03_Noise_Handler.json",
            icr_filename="oxfam_wash_s01e03_noise_handler.csv",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.MULTIPLE,
                    code_scheme=CodeSchemes.S01E03_NOISE_HANDLER,
                    coded_field="rqa_s01e03_noise_handler_coded",
                    analysis_file_key="rqa_s01e03_s01e03_noise_handler",
                    fold_strategy=lambda x, y: FoldStrategies.list_of_labels(
                        CodeSchemes.S01E03_NOISE_HANDLER, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "OXFAM WASH s01e03 Noise Handler"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
        CodingPlan(
            raw_field="s01_close_out_raw",
            time_field="sent_on",
            run_id_field="s01_close_out_run_id",
            coda_filename="OXFAM_WASH_s01_Close_Out.json",
            icr_filename="oxfam_wash_s01_close_out.csv",
            coding_configurations=[
                CodingConfiguration(
                    coding_mode=CodingModes.MULTIPLE,
                    code_scheme=CodeSchemes.S01_CLOSE_OUT,
                    coded_field="s01_close_out_coded",
                    analysis_file_key="s01_close_out",
                    fold_strategy=lambda x, y: FoldStrategies.list_of_labels(
                        CodeSchemes.S01_CLOSE_OUT, x, y))
            ],
            ws_code=CodeSchemes.WS_CORRECT_DATASET.get_code_with_match_value(
                "OXFAM WASH S01 Close Out"),
            raw_field_fold_strategy=FoldStrategies.concatenate),
    ]