Пример #1
0
def main():

    for ENDPOINT in ALL_ENDPOINTS:

        full_build = True

        keywords = {
            "session": http,
            "user": MC_USER,
            "pw": MC_PWD,
            "base": URL,
            "endpoint_key": ENDPOINT_KEY,
            "api_incremental_key": API_INCREMENTAL_KEYS[ENDPOINT],
            "limit": LIMIT,
            "min_pages": MIN_PAGES,
            "max_pages": MAX_PAGES,
            "semaphore": SEMAPHORE,
            "auth": AUTH,
            "schema": SCHEMA,
            "table_prefix": TABLE_PREFIX,
            "db_incremental_key": RS_INCREMENTAL_KEYS[ENDPOINT],
        }

        tap = mc.mobile_commons_connection(ENDPOINT, full_build, **keywords)
        tap.fetch_latest_timestamp()
        tap.page_count = tap.page_count_get(**keywords, page=MIN_PAGES)

        print(
            "Kicking off extraction for endpoint {}...".format(str.upper(ENDPOINT)),
            flush=True,
            file=sys.stdout,
        )

        if tap.page_count > 0:

            print(
                "There are {} pages in the result set for endpoint {}".format(
                    tap.page_count, str.upper(ENDPOINT)
                )
            )

            data = tap.ping_endpoint(**keywords)
            template = pd.DataFrame(columns=tap.columns)

            if data is not None:

                df = pd.concat([template, data], sort=True, join="inner")
                print(
                    "Loading data from endpoint {} into database...".format(
                        str.upper(ENDPOINT), flush=True, file=sys.stdout
                    )
                )
                tap.load(df, ENDPOINT)

        else:

            print("No new results to load for endpoint {}".format(str.upper(ENDPOINT)))
def main():

    for index in INDEX_SET.keys():

        full_build = True

        keywords = {
            "session": http,
            "user": MC_USER,
            "pw": MC_PWD,
            "base": URL,
            "endpoint_key": ENDPOINT_KEY,
            "api_incremental_key": API_INCREMENTAL_KEYS[index],
            "limit": LIMIT,
            "min_pages": MIN_PAGES,
            "max_pages": MAX_PAGES,
            "semaphore": SEMAPHORE,
            "schema": SCHEMA,
            "table_prefix": TABLE_PREFIX,
            "auth": AUTH,
            "db_incremental_key": RS_INCREMENTAL_KEYS[index],
        }

        tap = mc.mobile_commons_connection(index, full_build, **keywords)
        tap.fetch_latest_timestamp()
        tap.page_count = tap.page_count_get(**keywords, page=MIN_PAGES)

        print(
            "Kicking off extraction for endpoint {}...".format(
                str.upper(index)),
            flush=True,
            file=sys.stdout,
        )

        data = tap.ping_endpoint(**keywords)
        template = pd.DataFrame(columns=tap.columns)
        df = pd.concat([template, data], sort=True, join="inner")

        print("Loading data from endpoint {} into database...".format(
            str.upper(index), flush=True, file=sys.stdout))

        tap.load(df, index)

        indices = set(data["id"])
        index_results = []

        for i in indices:

            for ENDPOINT in ALL_ENDPOINTS:

                if str.lower(FULL_REBUILD_FLAG) == "true":

                    full_build = True

                else:

                    full_build = False

                extrakeys = {
                    "api_incremental_key": API_INCREMENTAL_KEYS[ENDPOINT],
                    "db_incremental_key": RS_INCREMENTAL_KEYS[ENDPOINT],
                    INDEX_SET[index]: i,
                }

                subkeywords = keywords.update(extrakeys)
                subtap = mc.mobile_commons_connection(ENDPOINT, full_build,
                                                      **keywords)
                subtap.index = INDEX_SET[index]
                subtap.fetch_latest_timestamp()

                print(
                    "Kicking off extraction for endpoint {} CAMPAIGN {}...".
                    format(str.upper(ENDPOINT), i),
                    flush=True,
                    file=sys.stdout,
                )

                if subtap.page_count_get(**keywords, page=MIN_PAGES) > 0:

                    print("Guessing page count...")

                    subtap.page_count = subtap.get_page_count(**keywords)

                    print(
                        "There are {} pages in the result set for endpoint {} and CAMPAIGN {}"
                        .format(subtap.page_count, str.upper(ENDPOINT), i))

                    data = subtap.ping_endpoint(**keywords)
                    template = pd.DataFrame(columns=subtap.columns)

                    if data is not None:

                        df = pd.concat([template, data],
                                       sort=True,
                                       join="inner")
                        df[INDEX_SET[index]] = str(i)
                        index_results.append(df)

                else:

                    print("No new results to load for endpoint {} CAMPAIGN {}".
                          format(str.upper(ENDPOINT), i))
        if len(index_results) > 0:

            all_results = pd.concat(index_results, sort=True, join="inner")

            print("Loading data from endpoint {} into database...".format(
                str.upper(ENDPOINT), flush=True, file=sys.stdout))

            subtap.load(all_results, ENDPOINT)

        else:

            print("No new data from endpoint {}. ".format(str.upper(ENDPOINT),
                                                          flush=True,
                                                          file=sys.stdout))
Пример #3
0
def main():

    # SENT_MESSAGES endpoint is very slow, found a quicker workaround that
    # involves querying sent messages for each campaign  instead

    for index in INDEX_SET.keys():

        full_build = True

        keywords = {
            "session": http,
            "user": MC_USER,
            "pw": MC_PWD,
            "base": URL,
            "endpoint_key": ENDPOINT_KEY,
            "api_incremental_key": API_INCREMENTAL_KEYS[index],
            "limit": LIMIT,
            "min_pages": MIN_PAGES,
            "max_pages": MAX_PAGES,
            "semaphore": SEMAPHORE,
            "auth": AUTH,
            "schema": SCHEMA,
            "table_prefix": TABLE_PREFIX,
            "db_incremental_key": RS_INCREMENTAL_KEYS[index],
        }

        tap = mc.mobile_commons_connection(index, full_build, **keywords)
        tap.fetch_latest_timestamp()
        tap.page_count = tap.page_count_get(**keywords, page=MIN_PAGES)

        print(
            "Kicking off extraction for endpoint {}...".format(
                str.upper(index)),
            flush=True,
            file=sys.stdout,
        )

        data = tap.ping_endpoint(**keywords)
        template = pd.DataFrame(columns=tap.columns)
        df = pd.concat([template, data], sort=True, join="inner")

        print("Loading data from endpoint {} into database...".format(
            str.upper(index), flush=True, file=sys.stdout))

        tap.load(df, index)
        indices = set(data["id"])
        # have to manually exclude the master campaign for outgoing messages endpoint bc it's too damn slow
        indices = [str(ix) for ix in indices if str(ix) != "169115"]
        index_results = []

        for i in indices:

            for ENDPOINT in ALL_ENDPOINTS:

                if str.lower(FULL_REBUILD_FLAG) == "true":

                    full_build = True

                else:

                    full_build = False

                extrakeys = {
                    "api_incremental_key": API_INCREMENTAL_KEYS[ENDPOINT],
                    "db_incremental_key": RS_INCREMENTAL_KEYS[ENDPOINT],
                    INDEX_SET[index]: i,
                }

                keywords.update(extrakeys)
                subtap = mc.mobile_commons_connection(ENDPOINT, full_build,
                                                      **keywords)
                subtap.index = INDEX_SET[index]
                subtap.fetch_latest_timestamp()

                print(
                    "Kicking off extraction for endpoint {} CAMPAIGN {}...".
                    format(str.upper(ENDPOINT), i),
                    flush=True,
                    file=sys.stdout,
                )

                ### Page count in results is given for messages endpoints, no need to guess

                subtap.page_count = subtap.page_count_get(**keywords,
                                                          page=MIN_PAGES)

                if subtap.page_count > 0:

                    print(
                        "There are {} pages in the result set for endpoint {} and CAMPAIGN {}"
                        .format(subtap.page_count, str.upper(ENDPOINT), i))

                    data = subtap.ping_endpoint(**keywords)
                    template = pd.DataFrame(columns=subtap.columns)

                    if data is not None:

                        df = pd.concat([template, data],
                                       sort=True,
                                       join="inner")
                        df[INDEX_SET[index]] = str(i)
                        index_results.append(df)

                else:

                    print("No new results to load for endpoint {} CAMPAIGN {}".
                          format(str.upper(ENDPOINT), i))

        if len(index_results) > 0:

            all_results = pd.concat(index_results, sort=True, join="inner")

            print("Loading data from endpoint {} into database...".format(
                str.upper(ENDPOINT), flush=True, file=sys.stdout))

            subtap.load(all_results, ENDPOINT)

        else:

            print("No new data from endpoint {}. ".format(str.upper(ENDPOINT),
                                                          flush=True,
                                                          file=sys.stdout))