def test_process_explain(): s = Snapshot(query=VALID_WHERE_STATEMENT) assert s.api_user.api_key == ENVIRONMENT_USER_KEY assert s.query.get_base_query() == { 'query': { 'where': VALID_WHERE_STATEMENT } } s.process_explain() assert s.last_explain_job.document_volume > 0 assert s.last_explain_job.job_state == const.API_JOB_DONE_STATE assert len(s.last_explain_job.job_id) == 36
def test_query_snapshot_id_parameters(): with pytest.raises( Exception, match= r'The query and snapshot_id parameters cannot be set simultaneously' ): s = Snapshot(snapshot_id=VALID_SNAPSHOT_ID, query=VALID_WHERE_STATEMENT)
def test_create_snapshot_existing_user(): s = Snapshot(query=VALID_WHERE_STATEMENT, api_user=aku) assert s.api_user.api_key == VALID_USER_KEY assert s.query.get_base_query() == { 'query': { 'where': VALID_WHERE_STATEMENT } }
def test_create_snapshot_evironment_variable(): s = Snapshot(query=VALID_WHERE_STATEMENT) assert s.api_user.api_key == ENVIRONMENT_USER_KEY assert s.query.get_base_query() == { 'query': { 'where': VALID_WHERE_STATEMENT } }
def test_create_snapshot_query_parameter(): q = SnapshotQuery(VALID_WHERE_STATEMENT) s = Snapshot(query=q) assert s.api_user.api_key == ENVIRONMENT_USER_KEY assert s.query.get_base_query() == { 'query': { 'where': VALID_WHERE_STATEMENT } }
def test_create_snapshot_request_info(): s = Snapshot(query=VALID_WHERE_STATEMENT, request_userinfo=True) assert len(s.api_user.account_name) > 0 assert s.api_user.max_allowed_extracted_documents != 0 assert s.query.get_base_query() == { 'query': { 'where': VALID_WHERE_STATEMENT } }
from factiva.news.snapshot import Snapshot, SnapshotQuery querystr = "publication_datetime >= '2018-01-01 00:00:00' AND publication_datetime <= '2018-01-10 00:00:00' AND LOWER(language_code) = 'en'" st1 = Snapshot(query=querystr) print(st1.query.get_explain_query()) print(st1.query.get_analytics_query()) print(st1.query.get_extract_query()) query1 = SnapshotQuery(where=querystr, limit=1000, file_format='avro', frequency='YEAR', date_field='publication_datetime', group_by_source_code=True, top=25) st2 = Snapshot(query=query1) print(st2.query.get_explain_query()) print(st2.query.get_analytics_query()) print(st2.query.get_extract_query())
def test_existing_snapshot(): s = Snapshot(snapshot_id=VALID_SNAPSHOT_ID) assert s.last_extraction_job.job_id.endswith(VALID_SNAPSHOT_ID) assert s.last_extraction_job.job_state == const.API_JOB_DONE_STATE assert len(s.last_extraction_job.files) != 0
def test_query_value_error(): with pytest.raises(ValueError, match=r'Unexpected value for the query-where clause'): s = Snapshot(query=[VALID_WHERE_STATEMENT])
from datetime import datetime from factiva.news.snapshot import Snapshot, ExplainJob querystr = "publication_datetime >= '2018-01-01 00:00:00' AND publication_datetime <= '2018-01-10 00:00:00' AND LOWER(language_code) = 'en'" st1 = Snapshot(query=querystr) # st1.submit_explain_job() # historic_explain_job = ExplainJob() # historic_explain_job.job_id = 'd8ac32e4-82c6-4b84-a3d4-e21b0171d2d3' # historic_explain_job.job_state = 'JOB_CREATED' # historic_explain_job.submitted_datetime = datetime.now() # historic_explain_job.link = f'https://api.dowjones.com/alpha/extractions/documents/{historic_explain_job.job_id}/_explain' # historic_explain_job.document_volume = 0 # st1.last_explain_job = historic_explain_job # st1.get_explain_job_results() st1.process_explain() print(st1.last_explain_job) print('Done!')
# import json # from datetime import datetime from factiva.news.snapshot import Snapshot # , ExtractionJob querystr = "publication_datetime >= '2018-01-01 00:00:00' AND publication_datetime <= '2018-01-02 00:00:00' AND LOWER(language_code) = 'en'" st1 = Snapshot( query=querystr ) # Takes the user key from the FACTIVA_APIKEY environment variable # st1.submit_extraction_job() # historic_extraction_job = ExtractionJob() # historic_extraction_job.job_id = 'ovdwdjktkk' # historic_extraction_job.job_state = 'JOB_QUEUED' # historic_extraction_job.submitted_datetime = datetime.now() # historic_extraction_job.link = f'https://api.dowjones.com/alpha/extractions/documents/dj-synhub-extraction-{st1.api_user.api_key}-{historic_extraction_job.job_id}' # st1.last_extraction_job = historic_extraction_job # st1.get_extraction_job_results() # st1.download_extraction_files() # st1.query.file_format = 'json' # st1.process_extraction() sn_dw = Snapshot(snapshot_id='tthb9cxch9') sn_dw.download_extraction_files() print(sn_dw) print('Done!')
def test_failed_explain(): s = Snapshot(query=INVALID_WHERE_STATEMENT) with pytest.raises(ValueError, match=r'Unrecognized name*'): s.process_explain()
from datetime import datetime from factiva.news.snapshot import Snapshot from factiva.news.snapshot import AnalyticsJob querystr = "publication_datetime >= '2018-01-01 00:00:00' AND publication_datetime <= '2018-01-10 00:00:00' AND LOWER(language_code) = 'en'" st1 = Snapshot(query=querystr) # st1.submit_analytics_job() # historic_analytics_job = AnalyticsJob() # historic_analytics_job.job_id = '60a0bb91-3b94-4f54-b8b9-432b72b9315e' # historic_analytics_job.job_state = 'JOB_CREATED' # historic_analytics_job.submitted_datetime = datetime.now() # historic_analytics_job.link = 'https://api.dowjones.com/alpha/analytics/60a0bb91-3b94-4f54-b8b9-432b72b9315e' # historic_analytics_job.data = [] # st1.last_analytics_job = historic_analytics_job # st1.get_analytics_job_results() st1.process_analytics() print(st1.last_analytics_job.data) print('Done!')
def test_analytics_job(): s = Snapshot(query=VALID_QUERY) s.process_analytics() data = s.last_analytics_job.data assert len(data) > 0 assert data[data['publication_datetime'] == '2018-01'] is not None
def test_analytics_error(): s = Snapshot(query=INVALID_WHERE_STATEMENT) with pytest.raises(ValueError, match=r'Unrecognized name*'): s.process_analytics()