Пример #1
0
 def run_single_validation(self, grid_size, validation_date, time_unit):
     data = ProcessData(self.dataset_info['name'], self.dataset_info['path'])
     df = data.get_formated_df()
     self.dataset_info['dict'] = data.dataset_dict #update dataset dictionary on experiment instance
     df_filtered = ProcessData.filter_by_field(df, self.custom_filter['field'], self.custom_filter['value'])
     validation = ValidateModel(df_filtered, self.dataset_info['dict'], time_unit, None)
     prediction_results = validation.inner_loop_validation(self.model, grid_size, self.train_dates, datetime.strptime(validation_date,'%Y-%m-%d'), self.metrics)
     return prediction_results
Пример #2
0
    def test_hit_rate_2(self):
        """ Test hit_rate=0 if no events falls on hotspots """
        df = pd.read_csv(
            "/Users/anamaria/Desktop/dev/security_project/datasets/deduplicate_siedco_09062020.csv"
        )
        data = ProcessData(
            "SIEDCO",
            "/Users/anamaria/Desktop/dev/security_project/datasets/deduplicate_siedco_09062020.csv"
        )
        df_input = data.add_timestamp(df)
        date = '2018-01-01'
        dataset_dict = data.dataset_dict
        df_input = ProcessData.filter_by_date(df_input, dataset_dict, date,
                                              date)
        df_1 = ProcessData.filter_by_field(df_input, 'LOCALIDAD', 'SUBA')
        df_2 = ProcessData.filter_by_field(df_input, 'LOCALIDAD', 'BOSA')

        timed_pts, region = ProcessData.get_time_space_points(
            df_1, data.dataset_dict)
        counting_kernel = open_cp.naive.CountingGridKernel(grid_width=150,
                                                           region=region)
        counting_kernel.data = timed_pts
        grid_prediction = counting_kernel.predict()

        coverages = [2, 4, 6, 8, 10]
        eval_pts, _ = ProcessData.get_time_space_points(
            df_2, data.dataset_dict)
        hit_rates_default = prediction_metrics.measure_hit_rates(
            grid_prediction, eval_pts, coverages, 'default')
        hit_rates_ground_truth = prediction_metrics.measure_hit_rates(
            grid_prediction, eval_pts, coverages, 'ground_truth_coverage')
        self.assertEqual(hit_rates_default, {
            2: 0.0,
            4: 0.0,
            6: 0.0,
            8: 0.0,
            10: 0.0
        })
        self.assertEqual(hit_rates_ground_truth, {0.6632653061224489: 0.0})
Пример #3
0
    def train(self, df_train_subset, dataset_dict, grid_size, **kwargs):
        df_train_subset['weekday'] = df_train_subset['TIME_STAMP'].dt.day_name(
        )
        df_train_subset = ProcessData.filter_by_field(df_train_subset,
                                                      'weekday',
                                                      kwargs['week_day'])

        train_pts, train_region = ProcessData.get_time_space_points(
            df_train_subset, dataset_dict)
        if (isinstance(kwargs['region'], open_cp.data.RectangularRegion)):
            train_region = kwargs['region']
        trainer = seppexp.SEPPTrainer(region=train_region, grid_size=grid_size)
        trainer.data = train_pts
        trained_model = trainer.train(iterations=50, use_corrected=True)
        trained_model.data = train_pts
        return trained_model
 def test_filter_by_field_case4(self):
     #case 4: error, value doesn't exist
     df = pd.read_csv(self.my_data.dataset_path)
     self.assertRaises(
         ValueError,
         lambda: ProcessData.filter_by_field(df, 'LOCALIDAD', 'NORMANDIA'))
 def test_filter_by_field_case3(self):
     #case 3: error, field doesn't exist
     df = pd.read_csv(self.my_data.dataset_path)
     self.assertRaises(
         ValueError,
         lambda: ProcessData.filter_by_field(df, 'nombre', 'Pedro'))
 def test_filter_by_field_case2(self):
     #case 2: filter successful, without field value
     df = pd.read_csv(self.my_data.dataset_path)
     df_filtered = ProcessData.filter_by_field(df, '', '')
     assertion_proxy = df_filtered.equals(df)
     self.assertEqual(assertion_proxy, True)
 def test_filter_by_field_case1(self):
     #case 1: filter successful
     df = pd.read_csv(self.my_data.dataset_path)
     df_filtered = ProcessData.filter_by_field(df, 'LOCALIDAD', 'BOSA')
     self.assertEqual(df_filtered.LOCALIDAD.unique()[0], 'BOSA')