Пример #1
0
    def test_cleanup(self):
        self.assertFalse(os.path.exists(TEST_CACHE))

        analysis = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        analysis.then(self.stage2)

        data = {}

        # Initial run, creates two cache files
        analysis.run(data)

        cache_files = glob(os.path.join(TEST_CACHE, '*.cache'))
        self.assertEqual(len(cache_files), 2)

        # Create false third cache file
        open(os.path.join(TEST_CACHE, 'foo.cache'), 'a').close()

        cache_files2 = glob(os.path.join(TEST_CACHE, '*.cache'))
        self.assertEqual(len(cache_files2), 3)

        # Second run, removes false cache file
        analysis.run(data)

        cache_files3 = glob(os.path.join(TEST_CACHE, '*.cache'))
        self.assertEqual(len(cache_files3), 2)
        self.assertSequenceEqual(cache_files, cache_files3)
Пример #2
0
    def test_cache_reused(self):
        analysis = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        analysis.then(self.stage2)

        analysis.run()

        self.assertEqual(self.executed_stage1, 1)
        self.assertEqual(self.executed_stage2, 1)

        analysis2 = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        analysis2.then(self.stage2)

        analysis2.run()

        self.assertEqual(self.executed_stage1, 1)
        self.assertEqual(self.executed_stage2, 1)
Пример #3
0
    def test_ancestor_changed(self):
        analysis = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        noop = analysis.then(self.stage_noop)
        noop.then(self.stage2)

        analysis.run()

        self.assertEqual(self.executed_stage1, 1)
        self.assertEqual(self.executed_stage2, 1)

        analysis2 = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        analysis2.then(self.stage2)

        analysis2.run()

        self.assertEqual(self.executed_stage1, 1)
        self.assertEqual(self.executed_stage2, 2)
Пример #4
0
    def test_same_function_twice_sequence(self):
        analysis = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        analysis.then(self.stage2)
        analysis.then(self.stage_noop)
        analysis.then(self.stage2)

        analysis.run()

        self.assertEqual(self.executed_stage1, 1)
        self.assertEqual(self.executed_stage2, 2)
Пример #5
0
    def test_never_cache(self):
        analysis = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        analysis.then(self.stage_never_cache)

        analysis.run()

        self.assertEqual(self.executed_stage1, 1)
        self.assertEqual(self.executed_stage_never_cache, 1)

        analysis.run()

        self.assertEqual(self.executed_stage1, 1)
        self.assertEqual(self.executed_stage_never_cache, 2)
Пример #6
0
    def test_data_flow(self):
        analysis = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        analysis.then(self.stage2)

        data = {}

        analysis.run(data)

        self.assertEqual(data, {})
        self.assertEqual(self.data_before_stage1, {})
        self.assertEqual(self.data_after_stage1, {'stage1': 5})
        self.assertEqual(self.data_before_stage2, {'stage1': 5})
        self.assertEqual(self.data_after_stage2, {'stage1': 5, 'stage2': 25})
Пример #7
0
    def test_ancestor_fingerprint_deleted(self):
        analysis = proof.Analysis(self.stage1, cache_dir=TEST_CACHE)
        analysis.then(self.stage2)

        analysis.run()

        self.assertEqual(self.executed_stage1, 1)
        self.assertEqual(self.executed_stage2, 1)

        os.remove(analysis._cache_path)

        analysis.run()

        self.assertEqual(self.executed_stage1, 2)
        self.assertEqual(self.executed_stage2, 2)
    data['year_police_beat'].print_table()


def print_year_data(data):
    data['groupped_year'].print_table()


def print_full_hour_data(data):
    data['full_hour'].print_table()


def print_data(data):
    data['table'].print_table(max_columns=None)


data_loaded = proof.Analysis(load_data)
year_data = data_loaded.then(add_year_column)
groupped_data = year_data.then(year_sum_counts)
groupped_data.then(upload_killed_injured_year)

year_police_beat_data = year_data.then(year_police_beat_sum_counts)
year_police_beat_data.then(upload_killed_injured_year_police_beat)

data_loaded.then(upload_accidents)

hour_data = data_loaded.then(add_full_hour_date)
full_hour_data = hour_data.then(sum_counts_by_full_hour)
full_hour_data.then(upload_full_hour)

data_loaded.run()
Пример #9
0
    race_groups = only_with_age.group_by('race')

    # Sub-group by age cohorts (20s, 30s, etc.)
    race_and_age_groups = race_groups.group_by(
        lambda r: '%i0s' % (r['age'] // 10),
        key_name='age_group'
    )

    # Aggregate medians for each group
    medians = race_and_age_groups.aggregate([
        ('count', agate.Count()),
        ('median_years_in_prison', agate.Median('years_in_prison'))
    ])

    # Sort the results
    sorted_groups = medians.order_by('median_years_in_prison', reverse=True)

    # Print out the results
    sorted_groups.print_table(max_rows=10)

analysis = proof.Analysis(load_data)
analysis.then(confessions)
analysis.then(median_age)
analysis.then(youth)

years_analysis = analysis.then(years_in_prison)
years_analysis.then(states)
years_analysis.then(race_and_age)

analysis.run()
Пример #10
0
    def test_cache_unicode(self):
        analysis = proof.Analysis(self.stage_unicode, cache_dir=TEST_CACHE)
        analysis.run()

        self.assertEqual(self.executed_stage_unicode, 1)
Пример #11
0
def main():
    data_pipeline = proof.Analysis(load_data)
    pipeline = prepare(data_pipeline)
    save_train(data_pipeline,pipeline)