def calculate_updates(dataset, new_data, new_dframe_raw=None, parent_dataset_id=None, update_id=None): """Update dataset with `new_data`. This can result in race-conditions when: - deleting ``controllers.Datasets.DELETE`` - updating ``controllers.Datasets.POST([dataset_id])`` Therefore, perform these actions asychronously. :param new_data: Data to update this dataset with. :param new_dframe_raw: DataFrame to update this dataset with. :param parent_dataset_id: If passed add ID as parent ID to column, default is None. """ __ensure_ready(dataset, update_id) if new_dframe_raw is None: new_dframe_raw = dframe_from_update(dataset, new_data) new_dframe = recognize_dates(new_dframe_raw, dataset.schema) new_dframe = __add_calculations(dataset, new_dframe) # set parent id if provided if parent_dataset_id: new_dframe = add_parent_column(new_dframe, parent_dataset_id) dataset.append_observations(new_dframe) dataset.clear_summary_stats() propagate(dataset, new_dframe=new_dframe, update={'add': new_dframe_raw}) dataset.update_complete(update_id)
def csv_file_reader(name, na_values=[], delete=False): try: return recognize_dates( pd.read_csv(name, encoding='utf-8', na_values=na_values)) finally: if delete: os.unlink(name)
def csv_file_reader(name, na_values=[], delete=False): try: return recognize_dates( pd.read_csv(name, encoding='utf-8', na_values=na_values)) finally: if delete: os.unlink(name)
def setUp(self): TestBase.setUp(self) self.dataset = Dataset() self.dataset.save( self.test_dataset_ids['good_eats_with_calculations.csv']) dframe = recognize_dates( self.get_data('good_eats_with_calculations.csv')) self.dataset.save_observations(dframe) self.group = None self.places = 5
def test_recognize_dates_from_schema(self): schema = Schema({ 'submit_date': { SIMPLETYPE: DATETIME } }) df_with_dates = recognize_dates(self.dframe, schema) for field in df_with_dates['submit_date']: self.assertTrue(isinstance(field, datetime))
def setUp(self): TestBase.setUp(self) self.dataset = Dataset() self.dataset.save( self.test_dataset_ids['good_eats_with_calculations.csv']) dframe = recognize_dates( self.get_data('good_eats_with_calculations.csv')) self.dataset.save_observations(dframe) self.group = None self.places = 5
def test_dframe(self): dataset = Dataset.create(self.test_dataset_ids["good_eats.csv"]) dataset.save_observations(recognize_dates(self.get_data("good_eats.csv"))) dframe = dataset.dframe() self.assertTrue(isinstance(dframe, DataFrame)) self.assertTrue(all(self.get_data("good_eats.csv").reindex(columns=dframe.columns).eq(dframe))) columns = dframe.columns # ensure no reserved keys self.assertFalse(MONGO_ID_ENCODED in columns) # ensure date is converted self.assertTrue(isinstance(dframe.submit_date[0], datetime))
def test_dframe(self): dataset = Dataset.create(self.test_dataset_ids['good_eats.csv']) dataset.save_observations( recognize_dates(self.get_data('good_eats.csv'))) records = [x for x in Observation.find(dataset)] dframe = dataset.dframe() self.assertTrue(isinstance(dframe, DataFrame)) self.assertTrue(all(self.get_data('good_eats.csv').reindex( columns=dframe.columns).eq(dframe))) columns = dframe.columns # ensure no reserved keys for key in MONGO_RESERVED_KEY_STRS: self.assertFalse(key in columns) # ensure date is converted self.assertTrue(isinstance(dframe.submit_date[0], datetime))
def test_dframe(self): dataset = Dataset.create(self.test_dataset_ids['good_eats.csv']) dataset.save_observations( recognize_dates(self.get_data('good_eats.csv'))) dframe = dataset.dframe() self.assertTrue(isinstance(dframe, DataFrame)) self.assertTrue(all(self.get_data('good_eats.csv').reindex( columns=dframe.columns).eq(dframe))) columns = dframe.columns # ensure no reserved keys self.assertFalse(MONGO_ID_ENCODED in columns) # ensure date is converted self.assertTrue(isinstance(dframe.submit_date[0], datetime))
def calculate_updates(dataset, new_data, new_dframe_raw=None, parent_dataset_id=None, update_id=None): """Update dataset with `new_data`. This can result in race-conditions when: - deleting ``controllers.Datasets.DELETE`` - updating ``controllers.Datasets.POST([dataset_id])`` Therefore, perform these actions asychronously. :param new_data: Data to update this dataset with. :param new_dframe_raw: DataFrame to update this dataset with. :param parent_dataset_id: If passed add ID as parent ID to column, default is None. """ if not __update_is_valid(dataset, new_dframe_raw): dataset.remove_pending_update(update_id) return __ensure_ready(dataset, update_id) if new_dframe_raw is None: new_dframe_raw = dframe_from_update(dataset, new_data) new_dframe = recognize_dates(new_dframe_raw, dataset.schema) new_dframe = __add_calculations(dataset, new_dframe) # set parent id if provided if parent_dataset_id: new_dframe = add_parent_column(new_dframe, parent_dataset_id) dataset.append_observations(new_dframe) dataset.clear_summary_stats() propagate(dataset, new_dframe=new_dframe, update={'add': new_dframe_raw}) dataset.update_complete(update_id)
def json_file_reader(content): return recognize_dates(pd.DataFrame(json.loads(content)))
def recognize_dates(self): return recognize_dates(self)
def test_recognize_dates(self): with_dates = recognize_dates(self.dframe) for field in with_dates['single_letter']: self.assertTrue(isinstance(field, basestring))
def _verify_dataset(self, dataset_id, fixture_path): dframe = Dataset.find_one(dataset_id).dframe() expected_dframe = recognize_dates( pickle.load(open('%s%s' % ( self.FIXTURE_PATH, fixture_path), 'rb'))) self._check_dframes_are_equal(dframe, expected_dframe)
def test_recognize_dates_as_dates(self): df_with_dates = recognize_dates(self.dframe) for field in df_with_dates['submit_date']: self.assertTrue(isinstance(field, datetime))
def test_count(self): dataset = Dataset.create(self.test_dataset_ids["good_eats.csv"]) dataset.save_observations(recognize_dates(self.get_data("good_eats.csv"))) self.assertEqual(len(dataset.dframe()), dataset.count())
def _verify_dataset(self, dataset_id, fixture_path): dframe = Dataset.find_one(dataset_id).dframe() expected_dframe = recognize_dates( pickle.load(open('%s%s' % (self.FIXTURE_PATH, fixture_path), 'rb'))) self._check_dframes_are_equal(dframe, expected_dframe)
def json_file_reader(content): return recognize_dates(pd.DataFrame(json.loads(content)))
def test_count(self): dataset = Dataset.create(self.test_dataset_ids['good_eats.csv']) dataset.save_observations( recognize_dates(self.get_data('good_eats.csv'))) self.assertEqual(len(dataset.dframe()), dataset.count())
def _save_observations(self): return Observation.save( recognize_dates(self.get_data('good_eats.csv')), self.dataset)
def test_recognize_dates(self): dframe = self.get_data('soil_samples.csv') with_dates = recognize_dates(dframe) for field in with_dates['single_letter']: self.assertTrue(isinstance(field, basestring))