示例#1
0
def test_redistribution_weights(tmpdir, tariff_data, short_form, hce):
    """Verify the shape of structure of the redistribution weights is correct
       for one country."""
    prep = TariffPrep(tariff_data,
                      working_dir_path=tmpdir.strpath,
                      short_form=short_form,
                      options={
                          'hce': hce,
                          'free_text': True,
                          'hiv': True,
                          'malaria': True,
                          'chinese': False
                      },
                      country='USA')
    undetermined_weights = prep._get_undetermined_matrix()

    cause_list = set(prep.data_module.CAUSES[cause]
                     for _, cause in prep.data_module.CAUSE_REDUCTION.items())
    for key, weights in undetermined_weights.items():
        age, sex = key
        assert sex in [1, 2, 3]
        if prep.AGE_GROUP == 'adult':
            assert age in range(10, 81, 5) + [99]
        elif prep.AGE_GROUP == 'child':
            assert age in [0, 1, 5, 10, 99]
        elif prep.AGE_GROUP == 'neonate':
            assert age in [0, 7, 99]

        assert not cause_list.symmetric_difference(weights.keys())
        assert np.allclose(sum(weights.values()), 1)
示例#2
0
def test_redistribution_weights_no_country(tmpdir, tariff_data):
    prep = TariffPrep(tariff_data,
                      working_dir_path=tmpdir.strpath,
                      short_form=True,
                      options={
                          'hce': True,
                          'free_text': True,
                          'hiv': True,
                          'malaria': True,
                          'chinese': False
                      },
                      country=None)
    undetermined_weights = prep._get_undetermined_matrix()
    assert undetermined_weights == {}
示例#3
0
def test_redistribution_weights_for_countries(tmpdir, tariff_data, country):
    """Any valid country from the countries list should be present and contain
       the default key used for invalid age-sex observations."""
    prep = TariffPrep(tariff_data,
                      working_dir_path=tmpdir.strpath,
                      short_form=True,
                      options={
                          'hce': True,
                          'free_text': True,
                          'hiv': True,
                          'malaria': True,
                          'chinese': False
                      },
                      country=country)
    undetermined_weights = prep._get_undetermined_matrix()
    assert isinstance(undetermined_weights, dict)
    assert (99, 3) in undetermined_weights
def prep(tmpdir):
    return TariffPrep(
        adult_tariff_data,
        working_dir_path=tmpdir.strpath,
        short_form=True,
        options={'hce': True, 'free_text': True, 'hiv': True, 'malaria': True,
                 'chinese': False},
        country='USA'
    )
示例#5
0
def test_training_likelihood_ranges(tariff_data):
    prep = TariffPrep(tariff_data, '/', True, {'hce': True, 'free_text': True, 'hiv': True, 'malaria': True, 'chinese': False}, 'USA')
    drop_headers = {'xs_name'}
    drop_headers.update(prep.data_module.SHORT_FORM_DROP_LIST)
    tariffs = get_tariff_matrix(prep.tariffs_filename, drop_headers,
                                prep.data_module.SPURIOUS_ASSOCIATIONS)
    prep.cause_list = sorted(tariffs.keys())
    validated = prep.read_input_file(prep.validated_filename)[1]
    train = prep.process_training_data(validated, tariffs,
                                       prep.data_module.FREQUENCIES,
                                       prep.data_module.CUTOFF_POS,
                                       [.25, .5, .75])
    uniform_train = train[0]
    likelihoods = train[4]

    assert likelihoods.keys() == prep.cause_list
    assert len(set(map(len, likelihoods.values()))) == 1
    for cause, likelihood in likelihoods.items():
        assert likelihood[-1] == len(uniform_train)
        assert sorted(likelihood) == likelihood
示例#6
0
def test_redistribution_weights_sum_to_one(tmpdir, tariff_data):
    prep = TariffPrep(tariff_data,
                      working_dir_path=tmpdir.strpath,
                      short_form=True,
                      options={
                          'hce': True,
                          'free_text': True,
                          'hiv': True,
                          'malaria': True,
                          'chinese': False
                      },
                      country='USA')
    df = pd.read_csv(prep.undetermined_matrix_filename)
    weights_by_id = df.groupby(['age', 'sex', 'iso3']).sum()
    assert weights_by_id.apply(np.allclose, args=(1, )).all()
示例#7
0
def test_redistribution_causes_match_reporting_causes(tmpdir, tariff_data):
    prep = TariffPrep(tariff_data,
                      working_dir_path=tmpdir.strpath,
                      short_form=True,
                      options={
                          'hce': True,
                          'free_text': True,
                          'hiv': True,
                          'malaria': True,
                          'chinese': False
                      },
                      country=None)
    with open(prep.undetermined_matrix_filename) as f:
        undetermined_causes = {row['gs_text34'] for row in csv.DictReader(f)}

    tariff_causes = {
        prep.data_module.CAUSES[cause]
        for cause in prep.data_module.CAUSE_REDUCTION.values()
    }

    assert undetermined_causes == tariff_causes
示例#8
0
def prep():
    return TariffPrep(sample_tariff_data, '/', True,
                      {'hce': True, 'free_text': True, 'hiv': True,
                       'malaria': True, 'chinese': False},
                      'USA')
示例#9
0
    def run(self):
        status_logger.info('Preparing variable headers.')
        status_notifier.update({'progress': (0, 15), 'sub_progress': None})

        intermediate_dir = intermediate_dir_path(self.output_dir_path)
        figures_dir = os.path.join(self.output_dir_path, 'figures')

        self.make_dir(intermediate_dir_path(self.output_dir_path))

        try:
            self.format_headers(self.input_file_path, os.path.join(intermediate_dir, CLEAN_HEADERS_FILENAME))
        except StopIteration:
            # File doesn't contain data
            message = 'Source file "{}" does not contain data.'.format(self.input_file_path)
            self._complete(CompletionStatus.FAIL, message)
            warning_logger.warning(message)
            return

        report_logger.info('Analysis parameters:')
        report_logger.info('- Input file: {}'.format(self.input_file_path))
        report_logger.info('- Output folder: {}'.format(self.output_dir_path))
        report_logger.info('- Country: {}'.format(self.country))
        report_logger.info('- HIV Region: {}'.format(self.options.get('hiv', True)))
        report_logger.info('- Malaria Region: {}'.format(self.options.get('malaria', True)))
        report_logger.info('')

        file_path = os.path.join(intermediate_dir, CLEAN_HEADERS_FILENAME)
        who_questionnaire = self.who_questionaire_test(file_path)

        if who_questionnaire:
            self.short_form = True
            form_name = 'WHO 2016 Questionnaire'

        else:
            self.short_form = self.short_form_test(file_path)
            warning_logger.debug('Detected {} form'.format(
                'short' if self.short_form else 'standard'))
            if self.short_form:
                form_name = 'PHMRC Shortened Questionnaire'
            else:
                form_name = 'PHMRC Full Questionnaire'
        report_logger.info('Detected {}'.format(form_name))

        who_prep = WHOPrep(self.output_dir_path)
        common_prep = CommonPrep(self.output_dir_path, self.short_form)
        adult_pre_symptom = PreSymptomPrep(adult_pre_symptom_data, self.output_dir_path, self.short_form)
        adult_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.ADULT, ADULT_RULES)
        adult_symptom = SymptomPrep(adult_symptom_data, self.output_dir_path, self.short_form)
        adult_results = TariffPrep(adult_tariff_data, self.output_dir_path, self.short_form, self.options, self.country)
        child_pre_symptom = PreSymptomPrep(child_pre_symptom_data, self.output_dir_path, self.short_form)
        child_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.CHILD, CHILD_RULES)
        child_symptom = SymptomPrep(child_symptom_data, self.output_dir_path, self.short_form)
        child_results = TariffPrep(child_tariff_data, self.output_dir_path, self.short_form, self.options, self.country)
        neonate_pre_symptom = PreSymptomPrep(neonate_pre_symptom_data, self.output_dir_path, self.short_form)
        neonate_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.NEONATE, NEONATE_RULES)
        neonate_symptom = SymptomPrep(neonate_symptom_data, self.output_dir_path, self.short_form)
        neonate_results = TariffPrep(neonate_tariff_data, self.output_dir_path, self.short_form, self.options, self.country)
        legacy = self.options.get('legacy_format', False)
        output = OutputPrep(self.output_dir_path, reorganize=not legacy,
                            keep_orig=legacy, short_form=self.short_form,
                            free_text=self.options.get('free_text', True),
                            hce=self.options.get('hce', True))
        cause_grapher = CauseGrapher(self.output_dir_path)
        csmf_grapher = CSMFGrapher(self.output_dir_path)

        self._abort_list.extend([
            who_prep,
            common_prep,
            adult_pre_symptom,
            adult_rules,
            adult_symptom,
            adult_results,
            child_pre_symptom,
            child_rules,
            child_symptom,
            child_results,
            neonate_pre_symptom,
            neonate_rules,
            neonate_symptom,
            neonate_results,
            cause_grapher,
            csmf_grapher,
        ])

        try:
            if who_questionnaire:
                who_prep.run()

            # makes adult-prepped.csv, child-prepped.csv, neonate-prepped.csv
            adult_data, child_data, neonate_data = common_prep.run()

            if adult_data:
                # makes adult-presymptom.csv
                adult_pre_symptom.run()
                # makes adult-logic-rules.csv
                adult_rules.run()
                # makes adult-symptom.csv
                adult_symptom.run()
                # creates adult output files
                adult_results.run()

            if child_data:
                # makes child-presymptom.csv
                child_pre_symptom.run()
                # makes child-logic-rules.csv
                child_rules.run()
                # makes child-symptom.csv
                child_symptom.run()
                # creates child output files
                child_results.run()

            if neonate_data:
                # makes neonate-presymptom.csv
                neonate_pre_symptom.run()
                # makes neonate-logic-rules.csv
                neonate_rules.run()
                # makes neonate-symptom.csv
                neonate_symptom.run()
                # creates neonate output files
                neonate_results.run()

            if self.options.get('figures') and (adult_data or child_data or neonate_data):
                self.make_dir(figures_dir)
                # generate all cause graphs
                cause_grapher.run()
                # generate all csmf graphs
                csmf_grapher.run()

            output.run()

        except AbortException:
            self._complete(CompletionStatus.ABORT)
        except Exception:
            traceback.print_exc()
            self._complete(CompletionStatus.FAIL)
        else:
            self._complete(CompletionStatus.DONE)