def get_info(self) -> StreamResponse: """Query a stream by its id. Returns ------- StreamResponse which contains all information of the current sream Raises ------ ValueError: when stream id is undefined RuntimeError: when the stream does not exists RuntimeError: when exists an unexpected HTTP error """ if not self.stream_id: raise const.UNDEFINED_STREAM_ID_ERROR uri = '{}/{}'.format(self.stream_url, self.stream_id) response = helper.api_send_request( method='GET', endpoint_url=uri, headers=self.stream_user.get_authentication_headers()) if response.status_code == 200: response = response.json() return StreamResponse(data=response['data'], links=response.get('links', None)) raise RuntimeError(response.text)
def download_file(self, endpoint_url, local_path): headers_dict = {'user-key': self.api_user.api_key} response = helper.api_send_request(method='GET', endpoint_url=endpoint_url, headers=headers_dict) open(local_path, 'wb').write(response.content) return response
def get_extractions(self) -> pd.DataFrame: """Request a list of the extractions of the account. Returns ------- Dataframe containing the information about the account extractions Raises ------ - ValueError when the API Key provided is not valid - RuntimeError when the API returns an unexpected error """ endpoint = f'{const.API_HOST}{const.API_EXTRACTIONS_BASEPATH}' headers_dict = {'user-key': self.api_key} response = api_send_request(method='GET', endpoint_url=endpoint, headers=headers_dict) if response.status_code != 200: if response.status_code == 403: raise ValueError('Factiva API-Key does not exist or inactive.') raise RuntimeError( f'Unexpected API Error with message: {response.text}') response_data = response.json() extraction_list = [ flatten_dict(extraction) for extraction in response_data['data'] ] return pd.DataFrame(extraction_list)
def delete(self, headers=None) -> bool: """Delete subscription for a given stream. Delete subscription allows a user to delete a subscription to a given stream Parameters ---------- headers: dict which contains the token/acces key for authorization Returns ------- bool value which shows if the subscription is complete deleted Raises ------ RuntimeError: when Unexpected API response happens """ uri = '{}/{}/subscriptions/{}'.format( self.url, self.stream_id, self.id ) response = helper.api_send_request( method='DELETE', endpoint_url=uri, headers=headers ) if response.status_code == 200: return True raise RuntimeError('Unexpected API response')
def download_file(self, endpoint_url: str, download_path: str): """Download a file from a job, using the file URL and stores them in download_path. Parameters ---------- endpoint_url: str String containing the URL to download the file from download_path: str String containing the path where to store the downloaded file Returns ------- Boolean : True if the download was successful. An Exception otherwise. Raises ------ - RuntimeException when the response from the API is not successful """ headers_dict = {'user-key': self.api_user.api_key} response = helper.api_send_request(method='GET', endpoint_url=endpoint_url, headers=headers_dict) if response.status_code == 200: with open(download_path, 'wb') as download_file_path: download_file_path.write(response.content) else: raise RuntimeError( f'API request returned an unexpected HTTP status, with content [{response.text}]' ) return True
def _create_by_snapshot_id(self) -> StreamResponse: """Create by snapshot id that allows a user to create a stream subscription using a snapshot id. Returns ------- StreamResponse which contains all information of the current stream Raises ------ ValueError: When query is undefined RuntimeError: When API request returns unexpected error """ print(self.snapshot_id) if not self.snapshot_id: raise ValueError('create fails: snaphot_id undefined') headers = self.stream_user.get_authentication_headers() headers['Content-Type'] = 'application/json' uri = f'{const.API_HOST}{const.API_SNAPSHOTS_BASEPATH}/{self.snapshot_id}/streams' response = helper.api_send_request( method='POST', endpoint_url=uri, headers=headers, ) if response.status_code == 201: response = response.json() self.stream_id = response['data']['id'] self.create_default_subscription(response) return StreamResponse(data=response['data'], links=response.get('links', None)) raise const.UNEXPECTED_HTTP_ERROR
def get_job_results(self, endpoint_url): headers_dict = { 'user-key': self.api_user.api_key, 'Content-Type': 'application/json' } response = helper.api_send_request(method='GET', endpoint_url=endpoint_url, headers=headers_dict) return response
def get_multiple_companies(self, code_type, companies_codes): """Request information about a list of companies. Parameters ---------- code_type : str String describing the code type used to request the information about the company. E.g. isin, ticker. companies_codes : list List containing the company codes to request information about Returns ------- DataFrame containing the company information Raises ------ RuntimeError: When API request returns unexpected error Examples -------- Get multiple companies data using the code type 'isin' and a company codes list >>> taxonomy = Taxonomy() >>> companies_data = taxonomy.get_multiple_companies('isin', ['ABC3E53433100', 'XYZ233341067', 'MN943181045']) >>> print(companies_data) id fcode common_name 0 ABC3E5343310 MCABST M************** 1 XYZ233341067 AXYZC A************ 2 MN9431810453 MNM M*********** """ helper.validate_type(code_type, str, 'Unexpected value: code_type must be str') helper.validate_type(companies_codes, list, 'Unexpected value: companies must be list') for single_company_code in companies_codes: helper.validate_type( single_company_code, str, 'Unexpected value: each company in companies must be str') headers_dict = {'user-key': self.api_user.api_key} payload_dict = {"data": {"attributes": {"ids": companies_codes}}} endpoint = f'{const.API_HOST}{const.API_SNAPSHOTS_COMPANIES_BASEPATH}/{code_type}' response = helper.api_send_request(method='POST', endpoint_url=endpoint, headers=headers_dict, payload=payload_dict) if response.status_code == 200 or response.status_code == 207: response_data = response.json() return pd.DataFrame.from_records( response_data['data']['attributes']['successes']) raise RuntimeError( f'API Request returned an unexpected HTTP status with message: {response.text}' )
def submit_job(self, endpoint_url, payload): headers_dict = { 'user-key': self.api_user.api_key, 'Content-Type': 'application/json' } response = helper.api_send_request(method='POST', endpoint_url=endpoint_url, headers=headers_dict, payload=payload) return response
def get_category_codes(self, category): """Request for available codes in the taxonomy for the specified category. Parameters ---------- category : str String with the name of the taxonomy category to request the codes from Returns ------- Dataframe containing the codes for the specified category Raises ------ ValueError: When category is not of a valid type RuntimeError: When API request returns unexpected error Examples -------- Getting the codes for the 'industries' category >>> taxonomy = Taxonomy() >>> industry_codes = taxonomy.get_category_codes('industries') >>> print(industry_codes) code description 0 i25121 Petrochemicals 1 i14001 Petroleum Refining 2 i257 Pharmaceuticals 3 iphrws Pharmaceuticals Wholesale 4 i643 Pharmacies/Drug Stores """ helper.validate_type( category, str, 'Unexpected value: category value must be string') response_format = 'csv' headers_dict = {'user-key': self.api_user.api_key} endpoint = f'{const.API_HOST}{const.API_SNAPSHOTS_TAXONOMY_BASEPATH}/{category}/{response_format}' response = helper.api_send_request(method='GET', endpoint_url=endpoint, headers=headers_dict) if response.status_code == 200: return pd.read_csv(StringIO(response.content.decode())) raise RuntimeError('API Request returned an unexpected HTTP Status')
def submit_job(self, payload=None) -> bool: """Submit a new job to be processed to the Factiva Snapshots API or Streams API. Submits a new job to be processed to the Factiva Snapshots API or Streams API. On a successful response from the API, saves the link of the job as well as the job_id on the caller instance. Parameters ---------- payload: dict or str, Optional Contains the payload required to create the new job. Usually some, the query to be sent to the API. - When a dict is given, it is converted into a string - When a string is given, it is used as it is. Returns ------- Boolean: True if the data was retrieved successfully. An Exception otherwise. Raises ------ - RuntimeError when the API response does not have a 201 status code. """ self.submitted_datetime = datetime.now() headers_dict = { 'user-key': self.api_user.api_key, 'Content-Type': 'application/json' } response = helper.api_send_request( method='POST', endpoint_url=self.get_endpoint_url(), headers=headers_dict, payload=payload) if response.status_code == 201: response_data = response.json() self.job_id = self.get_job_id(response_data) self.job_state = response_data['data']['attributes'][ 'current_state'] self.link = response_data['links']['self'] elif response.status_code == 400: raise ValueError(f'Invalid Query [{response.text}]') else: raise RuntimeError( f'API request returned an unexpected HTTP status, with content [{response.text}]' ) return True
def get_single_company(self, code_type, company_code): """Request information about a single company. Parameters ---------- code_type : str String describing the code type used to request the information about the company. E.g. isin, ticker. company_code : str String containing the company code Returns ------- DataFrame containing the company information Raises ------ RuntimeError: When API request returns unexpected error Examples -------- Get the company data using the code type 'isin' and the company code 'ABCNMST00394' >>> taxonomy = Taxonomy() >>> company_data = taxonomy.get_single_company('isin', 'ABCNMST00394') >>> print(company_data) id fcode common_name 0 ABCNMST00394 ABCYT Systemy Company S.A. """ helper.validate_type(code_type, str, 'Unexpected value: code_type must be str') helper.validate_type(company_code, str, 'Unexpected value: company must be str') headers_dict = {'user-key': self.api_user.api_key} endpoint = f'{const.API_HOST}{const.API_SNAPSHOTS_COMPANIES_BASEPATH}/{code_type}/{company_code}' response = helper.api_send_request(method='GET', endpoint_url=endpoint, headers=headers_dict) if response.status_code == 200: response_data = response.json() return pd.DataFrame.from_records( [response_data['data']['attributes']]) raise RuntimeError('API Request returned an unexpected HTTP status')
def create(self, headers=None): """Create a subscription for a given stream instance. Create subscription allows a user to create another subscription to a given stream Parameters ---------- headers: dict which contains the token/acces key for authorization Returns ------- Data which contains: subscription's id and type created Raises ------ ValueError: when a stream_id is undefined RuntimeError: when Unexpected API response happens """ if not self.stream_id: raise ValueError( ''' stream_id is not defined, it must be defined for creating a subscription ''' ) uri = '{}/{}/subscriptions'.format(self.url, self.stream_id) response = helper.api_send_request( method='POST', endpoint_url=uri, headers=headers ) if response.status_code == 201: response = response.json() data = response['data'] self.id = data[self.SUBSCRIPTION_IDX]['id'] self.subscription_type = data[self.SUBSCRIPTION_IDX]['type'] return data raise RuntimeError('Unexpected API response')
def _create_by_query(self) -> StreamResponse: """Create by query that allows a user to create a stream subscription using a query. Returns ------- StreamResponse which contains all information of the current stream Raises ------ ValueError: When query is undefined RuntimeError: When API request returns unexpected error """ if not self.query: raise ValueError('Streams query undefined in Create by query') base_query = self.query.get_base_query() streams_query = { "data": { "attributes": base_query['query'], "type": "stream" } } headers = self.stream_user.get_authentication_headers() headers['Content-Type'] = 'application/json' response = helper.api_send_request( method='POST', endpoint_url=self.stream_url, headers=headers, payload=streams_query, ) if response.status_code == 201: response = response.json() self.stream_id = response['data']['id'] self.create_default_subscription(response) return StreamResponse(data=response['data'], links=response.get('links', None)) raise const.UNEXPECTED_HTTP_ERROR
def get_job_results(self) -> bool: """Make a request to the API using the link of the job to get its status. Makes a request to the API using the link of the job to get its status. If the job has been completed, obtains the results of the job. Returns ------- Boolean : True if the submission was successful. An Exception otherwise. Raises ------ - RuntimeError when the job does not have a link to request the status to or when the response from the API is 404, meaning that the job was not found. Also, when the API returns a status code different than 200. """ if self.link == '': raise RuntimeError( 'Job has not yet been submitted or Job ID was not set') headers_dict = { 'user-key': self.api_user.api_key, 'Content-Type': 'application/json' } response = helper.api_send_request(method='GET', endpoint_url=self.link, headers=headers_dict) if response.status_code == 200: response_data = response.json() self.job_state = response_data['data']['attributes'][ 'current_state'] if self.job_state == const.API_JOB_DONE_STATE: self.set_job_data(response_data) elif response.status_code == 404: raise RuntimeError('Job ID does not exist.') else: raise RuntimeError( f'API request returned an unexpected HTTP status, with content [{response.text}]' ) return True
def get_streams(self) -> pd.DataFrame: """Obtain streams from a given user. Function which returns the streams a given user with its respective key using the default stream url Returns ------- Json object -> list of objects containing information about every stream (id, link, state, etc) Raises ------ AttributeError: When is not possible to parse the data as json or dataframe ValueError: When API key is not valid RuntimeError: When API request returns unexpected error """ request_headers = {'user-key': self.api_key} response = api_send_request( method="GET", endpoint_url=self.__API_ENDPOINT_STREAM_URL, headers=request_headers) if response.status_code == 200: try: response_data = response.json() return [ StreamResponse(data=stream, links=stream.get('links', None)) for stream in response_data['data'] ] except Exception: raise AttributeError('Unexpected Get Streams API Response.') elif response.status_code == 403: raise ValueError('Factiva API-Key does not exist or inactive.') else: raise RuntimeError('Unexpected Get Streams API Error')
def fetch_credentials(self) -> {}: """Fetch the current headers and uri (v1 or v2). Then it is executed a call to an api with the given header and uri The result is a json with the streaming credentials for Pubsub Returns ------- Json object -> credentials in json format obtained from Account Streams Credentials endpoint Raises ------ ValueError: When the credentials are not valid RuntimeError: When API request returns unexpected error """ headers = self.get_authentication_headers() uri = self.get_uri_context() response = api_send_request( method="GET", endpoint_url='{}{}'.format( uri, self.__API_ACCOUNT_STREAM_CREDENTIALS_BASEPATH), headers=headers) if response.status_code == 401: message = ''' Extraction API authentication failed for given credentials header:{} '''.format(headers) raise RuntimeError(message) try: streaming_credentials_string = response.json( )['data']['attributes']['streaming_credentials'] except KeyError: raise ValueError(''' Unable to find streaming credentials for given account ''') return json.loads(streaming_credentials_string)
def _check_stream_status(self): """Check the stream status. if it has reached an exceeded status at some point Raises ------ RuntimeError: When HTTP API Response is unexpected """ headers = self.stream_user.get_authentication_headers() response = helper.api_send_request(method='GET', endpoint_url=self.stream_id_uri, headers=headers) if response.status_code == 200: response = response.json() job_status = response['data']['attributes']['job_status'] if job_status == const.DOC_COUNT_EXCEEDED: self._check_account_status() else: raise RuntimeError('HTTP API Response unexpected')
def get_categories(self): """Request for a list of available taxonomy categories. Returns ------- List of available taxonomy categories. Raises ------ RuntimeError: When API request returns unexpected error Examples -------- This method is called with in the __init__ method, so the categories can be accessed as is. >>> taxonomy = Taxonomy() >>> print(taxonomy.categories) ['news_subjects', 'regions', 'companies', 'industries', 'executives'] Calling the method on its own >>> taxonomy = Taxonomy() >>> print(taxonomy.get_categories()) ['news_subjects', 'regions', 'companies', 'industries', 'executives'] """ headers_dict = {'user-key': self.api_user.api_key} endpoint = f'{const.API_HOST}{const.API_SNAPSHOTS_TAXONOMY_BASEPATH}' response = helper.api_send_request(method='GET', endpoint_url=endpoint, headers=headers_dict) if response.status_code == 200: return [ entry['attributes']['name'] for entry in response.json()['data'] ] raise RuntimeError('API Request returned an unexpected HTTP status')
def _check_account_status(self): """Check the account status for max allowed extracts done. Raises ------ RuntimeError: When HTTP API Response is unexpected """ host = self.stream_user.get_uri_context() headers = self.stream_user.get_authentication_headers() limits_uri = f'{host}/accounts/{self.stream_user.api_key}' limit_response = helper.api_send_request(method='GET', endpoint_url=limits_uri, headers=headers) if limit_response.status_code == 200: limit_response = limit_response.json() self.limit_msg = limit_response['data']['attributes'][ 'max_allowed_extracts'] else: raise RuntimeError(''' Unexpected HTTP Response from API while checking for limits ''')
def set_all_subscriptions(self): """Allow a user to set all subscriptions from a stream to local storage. Returns ------- Dataframe which contains the state about the current stream Raises ------ ValueError: when stream id is undefined """ if not self.stream_id: raise const.UNDEFINED_STREAM_ID_ERROR uri = '{}/{}'.format(self.stream_url, self.stream_id) response = helper.api_send_request( method='GET', endpoint_url=uri, headers=self.stream_user.get_authentication_headers()) if response.status_code == 200: response = response.json() self.create_default_subscription(response) else: raise const.UNEXPECTED_HTTP_ERROR
def delete(self) -> StreamResponse: """Delete a stream. Returns ------- StreamResponse which contains all information of the current which is expected to be CANCELLED Raises ------ ValueError: when stream id is undefined RuntimeError: when the stream does not exists RuntimeError: when exists an unexpected HTTP error """ if not self.stream_id: raise const.UNDEFINED_STREAM_ID_ERROR uri = f'{self.stream_url}/{self.stream_id}' headers = self.stream_user.get_authentication_headers() headers['Content-Type'] = 'application/json' response = helper.api_send_request( method='DELETE', endpoint_url=uri, headers=headers, ) if response.status_code == 200: response = response.json() return StreamResponse(data=response['data'], links=response.get('links', None)) if response == 404: raise RuntimeError('The Stream does not exist') raise const.UNEXPECTED_HTTP_ERROR