def main(): for ENDPOINT in ALL_ENDPOINTS: full_build = True keywords = { "session": http, "user": MC_USER, "pw": MC_PWD, "base": URL, "endpoint_key": ENDPOINT_KEY, "api_incremental_key": API_INCREMENTAL_KEYS[ENDPOINT], "limit": LIMIT, "min_pages": MIN_PAGES, "max_pages": MAX_PAGES, "semaphore": SEMAPHORE, "auth": AUTH, "schema": SCHEMA, "table_prefix": TABLE_PREFIX, "db_incremental_key": RS_INCREMENTAL_KEYS[ENDPOINT], } tap = mc.mobile_commons_connection(ENDPOINT, full_build, **keywords) tap.fetch_latest_timestamp() tap.page_count = tap.page_count_get(**keywords, page=MIN_PAGES) print( "Kicking off extraction for endpoint {}...".format(str.upper(ENDPOINT)), flush=True, file=sys.stdout, ) if tap.page_count > 0: print( "There are {} pages in the result set for endpoint {}".format( tap.page_count, str.upper(ENDPOINT) ) ) data = tap.ping_endpoint(**keywords) template = pd.DataFrame(columns=tap.columns) if data is not None: df = pd.concat([template, data], sort=True, join="inner") print( "Loading data from endpoint {} into database...".format( str.upper(ENDPOINT), flush=True, file=sys.stdout ) ) tap.load(df, ENDPOINT) else: print("No new results to load for endpoint {}".format(str.upper(ENDPOINT)))
def main(): for index in INDEX_SET.keys(): full_build = True keywords = { "session": http, "user": MC_USER, "pw": MC_PWD, "base": URL, "endpoint_key": ENDPOINT_KEY, "api_incremental_key": API_INCREMENTAL_KEYS[index], "limit": LIMIT, "min_pages": MIN_PAGES, "max_pages": MAX_PAGES, "semaphore": SEMAPHORE, "schema": SCHEMA, "table_prefix": TABLE_PREFIX, "auth": AUTH, "db_incremental_key": RS_INCREMENTAL_KEYS[index], } tap = mc.mobile_commons_connection(index, full_build, **keywords) tap.fetch_latest_timestamp() tap.page_count = tap.page_count_get(**keywords, page=MIN_PAGES) print( "Kicking off extraction for endpoint {}...".format( str.upper(index)), flush=True, file=sys.stdout, ) data = tap.ping_endpoint(**keywords) template = pd.DataFrame(columns=tap.columns) df = pd.concat([template, data], sort=True, join="inner") print("Loading data from endpoint {} into database...".format( str.upper(index), flush=True, file=sys.stdout)) tap.load(df, index) indices = set(data["id"]) index_results = [] for i in indices: for ENDPOINT in ALL_ENDPOINTS: if str.lower(FULL_REBUILD_FLAG) == "true": full_build = True else: full_build = False extrakeys = { "api_incremental_key": API_INCREMENTAL_KEYS[ENDPOINT], "db_incremental_key": RS_INCREMENTAL_KEYS[ENDPOINT], INDEX_SET[index]: i, } subkeywords = keywords.update(extrakeys) subtap = mc.mobile_commons_connection(ENDPOINT, full_build, **keywords) subtap.index = INDEX_SET[index] subtap.fetch_latest_timestamp() print( "Kicking off extraction for endpoint {} CAMPAIGN {}...". format(str.upper(ENDPOINT), i), flush=True, file=sys.stdout, ) if subtap.page_count_get(**keywords, page=MIN_PAGES) > 0: print("Guessing page count...") subtap.page_count = subtap.get_page_count(**keywords) print( "There are {} pages in the result set for endpoint {} and CAMPAIGN {}" .format(subtap.page_count, str.upper(ENDPOINT), i)) data = subtap.ping_endpoint(**keywords) template = pd.DataFrame(columns=subtap.columns) if data is not None: df = pd.concat([template, data], sort=True, join="inner") df[INDEX_SET[index]] = str(i) index_results.append(df) else: print("No new results to load for endpoint {} CAMPAIGN {}". format(str.upper(ENDPOINT), i)) if len(index_results) > 0: all_results = pd.concat(index_results, sort=True, join="inner") print("Loading data from endpoint {} into database...".format( str.upper(ENDPOINT), flush=True, file=sys.stdout)) subtap.load(all_results, ENDPOINT) else: print("No new data from endpoint {}. ".format(str.upper(ENDPOINT), flush=True, file=sys.stdout))
def main(): # SENT_MESSAGES endpoint is very slow, found a quicker workaround that # involves querying sent messages for each campaign instead for index in INDEX_SET.keys(): full_build = True keywords = { "session": http, "user": MC_USER, "pw": MC_PWD, "base": URL, "endpoint_key": ENDPOINT_KEY, "api_incremental_key": API_INCREMENTAL_KEYS[index], "limit": LIMIT, "min_pages": MIN_PAGES, "max_pages": MAX_PAGES, "semaphore": SEMAPHORE, "auth": AUTH, "schema": SCHEMA, "table_prefix": TABLE_PREFIX, "db_incremental_key": RS_INCREMENTAL_KEYS[index], } tap = mc.mobile_commons_connection(index, full_build, **keywords) tap.fetch_latest_timestamp() tap.page_count = tap.page_count_get(**keywords, page=MIN_PAGES) print( "Kicking off extraction for endpoint {}...".format( str.upper(index)), flush=True, file=sys.stdout, ) data = tap.ping_endpoint(**keywords) template = pd.DataFrame(columns=tap.columns) df = pd.concat([template, data], sort=True, join="inner") print("Loading data from endpoint {} into database...".format( str.upper(index), flush=True, file=sys.stdout)) tap.load(df, index) indices = set(data["id"]) # have to manually exclude the master campaign for outgoing messages endpoint bc it's too damn slow indices = [str(ix) for ix in indices if str(ix) != "169115"] index_results = [] for i in indices: for ENDPOINT in ALL_ENDPOINTS: if str.lower(FULL_REBUILD_FLAG) == "true": full_build = True else: full_build = False extrakeys = { "api_incremental_key": API_INCREMENTAL_KEYS[ENDPOINT], "db_incremental_key": RS_INCREMENTAL_KEYS[ENDPOINT], INDEX_SET[index]: i, } keywords.update(extrakeys) subtap = mc.mobile_commons_connection(ENDPOINT, full_build, **keywords) subtap.index = INDEX_SET[index] subtap.fetch_latest_timestamp() print( "Kicking off extraction for endpoint {} CAMPAIGN {}...". format(str.upper(ENDPOINT), i), flush=True, file=sys.stdout, ) ### Page count in results is given for messages endpoints, no need to guess subtap.page_count = subtap.page_count_get(**keywords, page=MIN_PAGES) if subtap.page_count > 0: print( "There are {} pages in the result set for endpoint {} and CAMPAIGN {}" .format(subtap.page_count, str.upper(ENDPOINT), i)) data = subtap.ping_endpoint(**keywords) template = pd.DataFrame(columns=subtap.columns) if data is not None: df = pd.concat([template, data], sort=True, join="inner") df[INDEX_SET[index]] = str(i) index_results.append(df) else: print("No new results to load for endpoint {} CAMPAIGN {}". format(str.upper(ENDPOINT), i)) if len(index_results) > 0: all_results = pd.concat(index_results, sort=True, join="inner") print("Loading data from endpoint {} into database...".format( str.upper(ENDPOINT), flush=True, file=sys.stdout)) subtap.load(all_results, ENDPOINT) else: print("No new data from endpoint {}. ".format(str.upper(ENDPOINT), flush=True, file=sys.stdout))