def test_get_dataset(self): dataset_id = 'us30' ds_by_id = Dataset(api=self.api, id=dataset_id) filters = ds_by_id.filters assert isinstance(filters, dict) ds_via_api = self.api.get_dataset(dataset_id) ds_by_uuid = Dataset(api=self.api, uuid=dataset_id) assert ds_via_api.filters == ds_by_id.filters == ds_by_uuid.filters
class TestDeleteAllByName(object): api = RPApi() base_dataset = Dataset( name='testing_api_delete_all', filters={}, # a dataset without filters ) def test_delete_all_by_name(self): dataset_name = self.base_dataset.name delete_all_datasets_by_name(self.api, dataset_name) assert len(get_datasets_by_name(self.api, dataset_name) ) == 0, "Seems we have datasets that should be deleted" ds1 = self.api.create_dataset(self.base_dataset) # create 1... ds2 = self.api.create_dataset(self.base_dataset) # create 2... assert len(get_datasets_by_name( self.api, dataset_name)) == 2, "We should have just created 2 datasets" # we can also check the new ones are in the owned owned_dataset = self.api.list_datasets() assert ds1 in owned_dataset assert ds2 in owned_dataset delete_all_datasets_by_name(self.api, dataset_name) assert len(get_datasets_by_name(self.api, dataset_name) ) == 0, "Seems we have datasets that should be deleted"
def test_create_and_update(self): delete_all_datasets_by_name(self.api, self.dataset_name) filters = {"rp_entity_id": {"$in": ['AAAAAA']}} dataset = Dataset( name=self.dataset_name, filters=filters, # a dataset with a filter ) dataset = self.api.create_dataset(dataset) assert dataset.id is not None dataset_id = dataset.id # change the dataset new_filters = {"rp_entity_id": {"$in": ['BBBBBB']}} dataset.filters = new_filters dataset.save() # get the dataset again dataset = self.api.get_dataset(dataset_id) assert dataset.filters == new_filters new_filters = {"rp_entity_id": {"$in": ['CCCCCC']}} dataset.filters = new_filters dataset.save() dataset.delete() assert delete_all_datasets_by_name(self.api, self.dataset_name) == 0
def setup_class(cls): cls.ds = cls.api.create_dataset( Dataset( name='testing_encoding', filters={ "rp_entity_id": '9BFEB5' # this entity has non-ascii name }, ))
def setup_class(cls): cls.ds = cls.api.create_dataset( Dataset( name='test_job_cancel', filters={ "rp_entity_id": 'D8442A' }, ) )
def test_dataset_copy_updated(self): source_dataset = Dataset(api=self.api, id='us30') new_dataset = Dataset( api=self.api, name="copy of the us30 dataset", filters=source_dataset.filters, fields=['timestamp_utc', 'rp_entity_id', 'avg_sentiment'], custom_fields=[{ "avg_sentiment": { "avg": { "field": "EVENT_SENTIMENT_SCORE", } } }], frequency='daily', tags=['copy', 'test']) new_dataset.save() new_dataset.delete()
def create_dataset(self, dataset): # be sure to create a copy new_dataset_data = dataset.as_dict() new_dataset = Dataset(api=self, **new_dataset_data) if 'uuid' in new_dataset_data: del new_dataset['uuid'] new_dataset.save() dataset_id = new_dataset.id logger.info("Created dataset %s" % dataset_id) return new_dataset
def test_granular_dataset(self): self.api.log_curl_commands = True granular_dataset = Dataset( name='Test-granular-dataset', filters={"$and": [{"rp_entity_id": {"$in": ["D8442A"]}}, {"relevance": 90}]}, ) granular_dataset = self.api.create_dataset(granular_dataset) try: granular_dataset.json('2018-01-01 00:00', '2018-01-02 00:00') finally: granular_dataset.delete()
def daily_download(dataset_id): end_date = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) start_date = end_date - datetime.timedelta(days=1) print("Date range: %s-%s" % (start_date, end_date)) dataset = Dataset(api=api, id=dataset_id) job = dataset.request_datafile(start_date, end_date) print("Waiting for the job to complete") job.wait_for_completion() output_filename = "dailydata-%s.csv" % start_date.strftime("%Y-%m-%d") job.save_to_file(output_filename) print("Daily download saved on:", output_filename)
def get_counts_universe(universe, ltgt, start_date, end_date, thresh, filename): label = "count_pos" if ltgt == "lt": label = "count_neg" global api custom_dataset = Dataset(name="Test set", frequency="daily", filters={ "and": [ { "rp_entity_id": { "in": universe } }, { "event_relevance": { "gte": thresh } }, { "event_sentiment_score": { ltgt: 0.5 } }, ] }, custom_fields=[ { label: { "count": { "field": "rp_entity_id" } } }, ]) ds = api.create_dataset(custom_dataset) req_data = ds.request_datafile(start_date=start_date, end_date=end_date) fname = filename + ".csv" with open(fname, "w") as fp: req_data.save_to_file(filename=fp.name) fp.close() print("Done importing data for {}".format(fname))
def test_simple_update(self): filters = {"rp_entity_id": {"$in": ['D8442A']}} ds = self.api.create_dataset( Dataset( name=self.dataset_name, filters=filters, # a dataset with a filter ) ) assert ds._lazy_retrieve_on_get is False dataset_id = ds.id ds = self.api.get_dataset(dataset_id) # retrieve the dataset assert ds._lazy_retrieve_on_get is True # it still have to be lazy loaded here ds.filters = {"rp_entity_id": {"$in": ["228D42"]}} # update the dataset *** ds.save() for r in ds.json('2019-01-01', '2019-01-02'): assert r['rp_entity_id'] == '228D42', "Expecting entity to be 228D42 - got %s" % r['rp_entity_id'] break
def test_indicator_dataset(self): indicator_dataset = Dataset( name='Test-indicator-dataset', filters={"$and": [{ "rp_entity_id": { "$in": ["D8442A"] } }]}, fields=[{ "average": { "avg": { "field": "EVENT_SENTIMENT_SCORE" } } }], frequency='daily', ) indicator_dataset = self.api.create_dataset(indicator_dataset) try: # ask the indicator dataset for its data response = indicator_dataset.json('2018-01-01 00:00', '2018-01-02 00:00') assert len(response) == 2 # we should get 2 rows assert {r['rp_entity_id'] for r in response} == {'D8442A', 'ROLLUP'} # do a request overriding fields and frequency to see the underlying data response = indicator_dataset.json( '2018-01-01 00:00', '2018-01-02 00:00', fields=['rp_story_id', 'rp_entity_id'], frequency='granular') assert len( response) > 200, "We should have many granular analytics rows" assert {r['rp_entity_id'] for r in response} == {'D8442A' }, "All rows should be D8442A" finally: indicator_dataset.delete()
def test_create_and_delete(self): # the test dataset is already there, let's delete it first # we can have multiple dataset with same name, deleting all of them delete_all_datasets_by_name(self.api, self.dataset_name) # create the dataset filters = {"rp_entity_id": {"$in": ['D8442A']}} dataset = Dataset( name=self.dataset_name, filters=filters, # a dataset with a filter ) new_dataset = self.api.create_dataset(dataset) assert new_dataset.filters == dataset.filters, "Created dataset filters are not as expected" assert new_dataset.id is not None, "We should have a dataset id" owned_dataset = self.api.list_datasets() assert new_dataset.id in owned_dataset, "We should own the new dataset" new_dataset.delete() owned_dataset = self.api.list_datasets() assert new_dataset.id not in owned_dataset, "The new dataset should be deleted"
Dataset( **{ "product": "rpa", "product_version": "1.0", "name": "Events in UK - example", "fields": [ "timestamp_utc", "rp_story_id", "rp_entity_id", "entity_type", "entity_name", "country_code", "relevance", "event_sentiment_score", "topic", "group", "headline" ], "filters": { "$and": [{ "relevance": { "$gte": 90 } }, { "country_code": { "$in": ["GB"] } }, { "event_sentiment_score": { "$nbetween": [-0.5, 0.5] } }] }, "frequency": "granular", }))
from ravenpackapi import RPApi, Dataset import logging logging.basicConfig(level=logging.INFO) # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi() # get the us30 dataset (its filters contain the top 30 US companies) us30 = Dataset(api=api, id='us30') print(us30.filters) # creating a new dataset with modified filters and fields # the filters are an aggregation of the us30 with some additional rule new_filters = { "$and": [ us30.filters, { "relevance": { "$gte": 90 } }, { "event_similarity_days": { "$gte": 1 } } ] } new_fields = [{ "daily_average_ess_1d": { "avg": { "field": "EVENT_SENTIMENT_SCORE",
from ravenpackapi import RPApi, Dataset from ravenpackapi.models.job import Job logging.basicConfig(level=logging.INFO) # initialize the API (here we use the RP_API_KEY in os.environ) api = RPApi() dataset_id = None # put here a dataset_id if you have it already if dataset_id is None: dataset = Dataset(api=api, filters={}, name='Average sentiment', frequency='daily', fields=[{ 'average_ess': { 'avg': { 'field': 'EVENT_SENTIMENT_SCORE' } } }]) dataset_id = dataset.save() else: dataset = api.get_dataset(dataset_id) # job = Job(api=api, # token='xxx') # if you already have a job you can use this # ... or request a new one job = dataset.request_datafile( start_date='2018-01-01 00:00:00',
def test_invalid_additional_fields(self): dt = '2020-01-01' with pytest.raises(ValueError): Dataset(id='us30', creation_time=dt, last_modified=dt, invalid_field=1)
def test_valid_uuid(self): d = Dataset(uuid='us30') assert d.id == 'us30'
def test_valid_additional_fields(self): dt = '2020-01-01' d = Dataset(id='us30', creation_time=dt, last_modified=dt) assert d.id == 'us30' assert d.creation_time == d.last_modified == dt
def get_dataset(self, dataset_id): return Dataset( api=self, uuid=dataset_id, )
from ravenpackapi import RPApi, Dataset from ravenpackapi.utils.helpers import delete_all_datasets_by_name api = RPApi() # Begin creating a dataset with your desired filters (see the RPA user guide for syntax) # You can then add functions (https://app.ravenpack.com/api-documentation/#indicator-syntax) # Alternatively you can also create the dataset via the query builder and just use the dataset_uuid dataset = Dataset(api, name='My Indicator dataset', filters={"relevance": {"$gt": 90}}, frequency='daily', fields=[{"avg_1d": {"avg": {"field": "EVENT_SENTIMENT_SCORE", "lookback": 1, "mode": "granular"}}}, {"avg_7d": { "avg": {"field": "avg_1d", "lookback": 1, "mode": "granular"}}}, {"buzz_365d": {"buzz": {"field": "RP_ENTITY_ID", "lookback": 365}}}, {"newsvolume_1d": {"count": {"field": "RP_ENTITY_ID", "lookback": 1}}}, {"newsvolume_365d": {"avg": {"field": "newsvolume_1d", "lookback": 365, "mode": "granular"}}}] ) dataset.save() # you can also change the fields, (remember to save afterward) dataset.fields = [ {"avg": {"avg": {"field": "EVENT_SENTIMENT_SCORE", "lookback": 365}}}, ] dataset.save() # Following this, you can then generate a datafile (for your desired date range) job = dataset.request_datafile(