def test_upload_and_fetch_last_inserted(): os.environ["DATAPLATTFORM_POLLING_STATUS_TABLENAME"] = POLLER_CONFIG[ "PollingStatusTableName"] TESTING_TYPE = "TestingTestingType" last_inserted_correct = "123" PollerUtil.upload_last_inserted_doc(last_inserted_correct, TESTING_TYPE) last_inserted = PollerUtil.fetch_last_inserted_doc(TESTING_TYPE) assert last_inserted == last_inserted_correct last_inserted_correct_2 = "12739jldfjlka" PollerUtil.upload_last_inserted_doc(last_inserted_correct_2, TESTING_TYPE) last_inserted = PollerUtil.fetch_last_inserted_doc(TESTING_TYPE) assert last_inserted == last_inserted_correct_2
def poll(): """ This method calls the poll_account_data with the defined accounts in TWITTER_ACCOUNTS and is returned new tweets. If new tweets are found, these get put in to dynamoDB and last_inserted_id is updated :return: True """ api = authenticate() week_ago = (datetime.now() - timedelta(days=7)).date() for account in TWITTER_ACCOUNTS: last_inserted_doc = PollerUtil.fetch_last_inserted_doc(TWITTER_ACCOUNTS[account]) last_inserted_id = 1 if last_inserted_doc: last_inserted_id = int(last_inserted_doc) data_points = poll_account_data(api, account, last_inserted_id, week_ago) for data_point in data_points: result = PollerUtil.post_to_ingest_api(data_point, TWITTER_ACCOUNT_TYPE) if result is not None: last_inserted_id = data_point['id'] PollerUtil.upload_last_inserted_doc(last_inserted_id, TWITTER_ACCOUNTS[account]) return True
def poll(): client = authenticate() for org_name in LINKEDIN_ORGS.keys(): for api_type in LINKEDIN_APIS.keys(): doc_name = api_type + org_name.replace(" ", "").upper() last_inserted_timestamp = 0 last_inserted_doc = PollerUtil.fetch_last_inserted_doc(doc_name) if last_inserted_doc: last_inserted_timestamp = int(last_inserted_doc) data_points = poll_daily_stats_data(client, LINKEDIN_ORGS[org_name], org_name, LINKEDIN_APIS[api_type], last_inserted_timestamp) if not data_points: break for data_point in data_points: result = PollerUtil.post_to_ingest_api(data_point, api_type) if result is not None: last_inserted_timestamp = data_point['data']['timeRange'][ 'end'] PollerUtil.upload_last_inserted_doc(last_inserted_timestamp, doc_name) return True
def poll(): last_inserted_doc = PollerUtil.fetch_last_inserted_doc(YR_TYPE) last_inserted_timestamp = 0 if last_inserted_doc: last_inserted_timestamp = int(last_inserted_doc) location = os.getenv("DATAPLATTFORM_YR_LOCATION", "Norway/Oslo/Oslo/Lakkegata") data_points = get_yr_data(location, last_inserted_timestamp) for forecast in data_points: result = PollerUtil.post_to_ingest_api(forecast, YR_TYPE) if result is not None: last_inserted_timestamp = forecast["time_from"] PollerUtil.upload_last_inserted_doc(last_inserted_timestamp, YR_TYPE) return True
def poll(): """ This method gets run every day and should fetch data from the website and compare it to a database in order to avoid duplicates. :return: True if everything was successful. """ last_inserted_doc = PollerUtil.fetch_last_inserted_doc(UBW_TYPE) ubw_datas = fetch_ubw_data() for ubw_data in ubw_datas: if should_upload_ingest(ubw_data, last_inserted_doc): last_doc_new = insert_new_ubw_data(ubw_data) if last_doc_new is not None: last_inserted_doc = last_doc_new PollerUtil.upload_last_inserted_doc(last_inserted_doc, UBW_TYPE) return True
def get_commits(repo): """ get all commits, for the given repo, which have not yet been posted to the ingest api """ last = PollerUtil.fetch_last_inserted_doc("BitbucketType" + repo["slug"]) params = {"since": last} url = f"{get_repo_url(repo)}/commits" commits = get_all_pages(url, params=params) for commit in commits: del commit["committer"] del commit["author"] commit["repo"] = repo # There are trailing zeros in the timestamps from bitbucket. Get rid of # those to get valid unix timestamps commit["authorTimestamp"] //= 1000 commit["committerTimestamp"] //= 1000 # Make sure commits are in order with olderst first and newest last commits = sorted(commits, key=lambda it: it["committerTimestamp"]) return commits
def poll(): """ This method gets run every day and should fetch data from the website and compare it to a database in order to avoid duplicates. :return: True if everything was successful. """ # Should actually be called most_recent here in blog_poller. last_inserted_doc = PollerUtil.fetch_last_inserted_doc(KNOWITLABS_TYPE) html = get_html_from_blog() medium_data = get_medium_data_dict(html) docs = create_docs(medium_data) most_recent = docs[0]["id"] for doc in docs: if should_upload_ingest(doc, last_inserted_doc): PollerUtil.post_to_ingest_api(doc, KNOWITLABS_TYPE) else: break if last_inserted_doc != most_recent: PollerUtil.upload_last_inserted_doc(most_recent, KNOWITLABS_TYPE) return True
def poll(): date_now = datetime.now().date() last_inserted_doc = PollerUtil.fetch_last_inserted_doc(DOC_TYPE) if last_inserted_doc: last_inserted_date = datetime.strptime(str(last_inserted_doc), DATETIME_FORMAT).date() if not last_inserted_date < date_now: return False client = authenticate() for key in LINKEDIN_ORGS.keys(): data_points = poll_stats_data(client, LINKEDIN_ORGS[key], key) for data_point in data_points: for data_point_key in data_point.keys(): PollerUtil.post_to_ingest_api(data_point[data_point_key], data_point_key) PollerUtil.upload_last_inserted_doc(str(date_now), DOC_TYPE) return True
def poll(): """ This method calls the poll_search_data method and is returned new tweets. If new tweets are found, these get put in to dynamoDB and last_inserted_id is updated :return: True """ api = authenticate() last_inserted_doc = PollerUtil.fetch_last_inserted_doc(TWITTER_SEARCH_TYPE) last_inserted_id = 1 if last_inserted_doc: last_inserted_id = int(last_inserted_doc) data_points = poll_search_data(api, last_inserted_id) for data_point in data_points: result = PollerUtil.post_to_ingest_api(data_point, TWITTER_SEARCH_TYPE) if result is not None: last_inserted_id = data_point['id'] PollerUtil.upload_last_inserted_doc(last_inserted_id, TWITTER_SEARCH_TYPE) return True