def list( self, start_date=None, end_date=None, tags=None, status=None, filename=None, ): params = dict( start_date=as_datetime_str(start_date), end_date=as_datetime_str(end_date), tags=tags, status=status, filename=filename, ) # the list of files is splitted in pages - let's collect them while True: response = self.api.request( '%s/files' % self.api._UPLOAD_BASE_URL, params={k: v for k, v in params.items() if v is not None}) data = response.json() if 'results' in data: for r in data['results']: yield File( r['file_id'], status=r.get('status'), name=r.get('name'), api=self.api, ) if data.get('next_page_key'): # next page params['next_page_key'] = json.dumps(data['next_page_key']) else: break
def list_jobs(self, start_date, end_date, status=None): response = self.request('/jobs', params={ "start_date": as_datetime_str(start_date), "end_date": as_datetime_str(end_date), "status": status }) return [Job(self, job) for job in response.json()['jobs']]
def json( self, start_date, end_date, fields=None, time_zone=None, frequency=None, having=None, custom_fields=None, filters=None, conditions=None, ): """ Use the dataset filters to request a data Some limitation applies: * granular datasets: 10,000 records * indicator datasets: 500 entities, timerange 1Y, lookback 1Y """ api = self.api dataset_id = self.id # let's build the body, with all the defined fields body = { "start_date": as_datetime_str(start_date), "end_date": as_datetime_str(end_date), "time_zone": time_zone } body.update( dict( frequency=frequency, fields=fields, custom_fields=custom_fields, filters=filters, conditions=conditions, having=having, )) response = api.request( endpoint="/json/{dataset_uuid}".format(dataset_uuid=dataset_id), method='post', json={k: v for k, v in body.items() if v is not None}, # remove null values ) data = response.json() return Results(data['records'], name='JSON query for %s' % dataset_id)
def count(self, start_date, end_date, time_zone=None): """ Get the count of stories, analytics records and entities over a period """ api = self.api dataset_id = self.id # let's build the body, with all the defined fields body = {"start_date": as_datetime_str(start_date), "end_date": as_datetime_str(end_date), "time_zone": time_zone} response = api.request( endpoint="/datafile/{dataset_uuid}/count".format(dataset_uuid=dataset_id), method='post', json={k: v for k, v in body.items() if v is not None} ) return response.json()
def request_datafile( self, start_date, end_date, output_format='csv', compressed=False, tags=None, notify=False, allow_empty=True, time_zone='UTC', fields=None, ): """ Request a datafile with data in the requested date This is asyncronous: it returns a job that you can wait for if allow_empty is True, it may return None meaning that the job will have no data """ api = self.api data = { "start_date": as_datetime_str(start_date), "end_date": as_datetime_str(end_date), "format": output_format, "compressed": compressed, "notify": notify, "time_zone": time_zone, "tags": tags, "fields": fields, } try: response = api.request( endpoint="/datafile/%s" % self.id, json={k: v for k, v in data.items() if v is not None}, # remove null values, method='post', ) except APIException as e: if e.response.status_code == 400 and allow_empty: errors = [e['type'] for e in e.response.json()['errors']] if 'DatafileEmptyError' in errors: return None raise e job = Job(api=self.api, token=response.json() ['token']) # an undefined job, has just the token return job
def json( self, start_date, end_date, fields=None, filters=None, time_zone=None, frequency='granular', having=None, custom_fields=None, conditions=None, product='rpa', product_version='1.0', ): # let's build the body, with all the defined fields body = { "start_date": as_datetime_str(start_date), "end_date": as_datetime_str(end_date), "time_zone": time_zone } body.update( dict( frequency=frequency, fields=fields, custom_fields=custom_fields, filters=filters, conditions=conditions, having=having, product=product, product_version=product_version, )) response = self.request( endpoint="/json", method='post', json={k: v for k, v in body.items() if v is not None}, # remove null values ) data = response.json() return Results(data['records'], name='Ad-hoc JSON query')
def list( self, start_date=None, end_date=None, tags=None, status=None, file_name=None, folder_id=None, page_size=50, ): params = dict( start_date=as_datetime_str(start_date), end_date=as_datetime_str(end_date), tags=tags, status=status, file_name=file_name, folder_id=folder_id, page_size=page_size, ) # the list of files is splitted in pages - let's collect them get_next, offset = False, 0 while True: response = self.api.request( '%s/files' % self.api._UPLOAD_BASE_URL, params={k: v for k, v in params.items() if v is not None}) data = response.json() if 'results' in data: results = data['results'] for r in results: get_next = len(results) == page_size file_params = { field: r.get(field) for field in FILE_FIELDS } yield File(api=self.api, **file_params) if not get_next: break offset += page_size params['offset'] = offset
def test_datetime_str(): assert as_datetime_str('2018-01-01') == '2018-01-01'