Exemplo n.º 1
0
def getWorstAndBestPerformers(df, feed, target_cpa):
    if target_cpa is None:
        return
    worst_performer = getWorstPerformer(df, target_cpa)
    best_performer = getBestPerformer(df, target_cpa)

    if worst_performer is None and worst_performer is None:
        return
    if worst_performer is None:
        this_feed = best_performer
    else:
        this_feed = worst_performer.append(best_performer)

    if functions.dfIsEmpty(this_feed):
        return

    if functions.dfIsEmpty(feed):
        return

    this_feed["created_at"] = datetime.now()
    this_feed["updated_at"] = datetime.now()
    this_feed["display_from_date"] = datetime.now()

    cols = ["id", "created_at", "updated_at", "account_id", "priority",
            "headline", "message", "suggestion", "display_from_date", "advert_id", "date_range"]

    this_feed = this_feed[cols]

    return feed.append(this_feed)
Exemplo n.º 2
0
def getWorstAndBestPerformers(df, feed, target_cpa):
    worst_performer = getWorstPerformer(df, target_cpa)
    best_performer = getBestPerformer(df, target_cpa)

    this_feed = worst_performer.append(best_performer)

    if functions.dfIsEmpty(this_feed):
        return

    this_feed["created_at"] = datetime.now()
    this_feed["updated_at"] = datetime.now()
    this_feed["display_from_date"] = datetime.now()
    this_feed["search_query_n_gram_id"] = this_feed["id"]

    cols = [
        "id", "created_at", "updated_at", "account_id", "priority", "headline",
        "message", "suggestion", "display_from_date", "n_gram", "date_range",
        "search_query_n_gram_id"
    ]
    this_feed = this_feed[cols]

    if functions.dfIsEmpty(feed):
        return this_feed

    return feed.append(this_feed)
def main(df, account_id, settings):
    Log("info", "processing campaign winning element", "", account_id)

    if functions.dfIsEmpty(df):
        return

    # add the campaign id, add path_1_path_2
    df = processDf(df, account_id, settings)

    lines = ["headline_1", "headline_2", "description", "path_1_path_2"]

    ldf = None
    # start lines loop
    for line in lines:
        this_df = df[["clicks", "impressions", "campaign_id", line]].groupby(["campaign_id", line]).sum()
        this_df["ctr"] = (this_df.clicks / this_df.impressions) * 100
        if functions.dfIsEmpty(this_df):
            break
        campaignIds = list(this_df.index.levels[0])
        this_df = this_df.reset_index()

        # start campaign ids loop
        for i, campaign_id in enumerate(campaignIds):
            # if i!=5:continue
            tdf = this_df.copy()
            quantile = tdf[(tdf.campaign_id == campaign_id)].impressions.quantile()
            tdf = tdf[(tdf.campaign_id == campaign_id) & (tdf.impressions > quantile) & (tdf[line] != "/")].sort_values(
                "ctr", ascending=False).head(3).reset_index(drop=True)

            if tdf.shape[0] == 0:
                continue

            tdf["order"] = tdf.index + 1
            tdf["type"] = line
            tdf.rename(columns={line: "value"}, inplace=True)
            tdf = tdf[["campaign_id", "value", "order", "type"]]

            try:
                ldf = ldf.append(tdf)
            except:
                ldf = tdf.copy()

    if functions.dfIsEmpty(ldf):
        return

    ldf = ldf.reset_index(drop=True)
    ldf.value = ldf.value.str.replace(" --", "")
    ldf["id"] = pd.Series([uuid.uuid1() for i in range(len(ldf))]).astype(str)
    # #now let's add created_at and updated_at as today
    ldf["created_at"] = datetime.now()
    ldf["updated_at"] = datetime.now()
    # for date_range in settings.date_ranges:
    date_range = "last_30_days"
    ldf["date_range"] = date_range
    ldf.to_sql("campaign_winning_elements", settings.createEngine(), if_exists='append', index=False)
    return df
def createFullDataFrame(account_id, settings):
    df = None
    for date_range in settings.date_ranges:

        this_df = processDateRange(date_range, account_id, settings)

        if functions.dfIsEmpty(this_df):
            continue

        if functions.dfIsEmpty(df):
            df = this_df.copy()
        else:
            df = df.append(this_df.copy())

    return df
Exemplo n.º 5
0
def main(account_id):
    Log("info", "getting keyword performance from the api", "", account_id)

    report = Report(account_id, "", options)

    df = report.createDfWithAllDateRanges(account_id)

    if functions.dfIsEmpty(df):
        return

    # print df[df.google_id=="309491001346"].cpc_bid

    # remember column headers are as per the download here
    df["keyword_id"] = df.apply(lambda row: functions.addIdToDf(
        account_id, row["Keyword ID"], row["Ad group ID"]),
                                axis=1)
    df["id"] = df["keyword_id"]

    df = addParentId(df, account_id)  # our UUID from the keywords table

    df = report.basicProcessing(df)

    df = reportSpecificProcessing(df)

    report.writeToEntitiesTable(df, report, account_id)

    report.writeToPerformanceTable(df, report, account_id)
    def main(self):
        """Populate the adverts and advert_performance tables"""
        Log("info", "populating the adverts and advert_performance tables", "",
            self.account_id)

        settings = Settings()
        for date_range in settings.date_ranges:

            df_chunks = self.dataFrameFromAdPerformanceReports(date_range)
            self.deleteExisting(date_range)

            while True:

                try:
                    df = next(df_chunks)
                except StopIteration:
                    break

                if functions.dfIsEmpty(df):
                    continue

                try:
                    df = self.processDf(df, date_range)
                except Exception as exception:
                    Log("error", str(exception), traceback.format_exc())
                    raise

                df = self.addInfo(df)

                self.writeToAdvertPerformanceTable(df, date_range)
Exemplo n.º 7
0
    def addUserAccounts(self, user_id, first_run):

        if not Helpers().isActiveUser(user_id):
            Log("info", "this user isn't active. Exiting",
                'user_id: %s' % (user_id))
            return

        try:

            Log("info", "adding accounts for user id '%s'" % user_id)
            self.user_id = user_id
            accounts_df = self.getAccountsDf()
            if functions.dfIsEmpty(accounts_df):
                return
            accounts_df = accounts_df.drop_duplicates('google_id')
            accounts_df = self.dropDuplicates(accounts_df, first_run)
            if (accounts_df.shape[0] == 0 and first_run):
                Log('warning',
                    "no unique google accounts were found for this user",
                    "user_id (%s)" % (user_id), "")
            accounts_df.to_sql("accounts",
                               Database().createEngine(),
                               index=False,
                               if_exists="append")
        except Exception as exception:
            Log("error",
                str(exception) + " (User id: %s)" % (user_id),
                traceback.format_exc())

        Log("info", "finished adding account meta data")
def filterAndAddColumns(df, date_range, account_id):
    # multiply percentages by 100
    df.ctr = df.ctr * 100
    df.conversion_rate = df.conversion_rate * 100
    df.roas = df.roas * 100

    # filter down. Must have at least 10 clicks
    df = df[df.clicks > 10]

    df = df.reset_index().fillna("")

    if functions.dfIsEmpty(df):
        return

    df["n_gram"] = df.level_0 + " " + df.level_1 + " " + df.level_2

    df["n_gram"] = df["n_gram"].str.strip()

    df = df.reset_index(drop=True)

    df["account_id"] = account_id
    df["date_range"] = date_range
    df["id"] = pd.Series([uuid.uuid1() for i in range(len(df))]).astype(str)
    # #now let's search_queryd created_at and updated_at as today
    df["created_at"] = datetime.now()
    df["updated_at"] = datetime.now()

    return df
Exemplo n.º 9
0
def main(account_id):
    Log("info", "processing keyword feed", "", account_id)

    target_cpa = functions.getTargetCpa(account_id)

    table_name = "keyword_feed"

    deleteAccountDataFromTable(account_id, table_name, settings)

    feed = None
    for date_range in settings.date_ranges:
        df = getKeywords(account_id, date_range)
        df = df.loc[:, ~df.columns.duplicated()]
        # print date_range
        # print df.shape

        # to check for duplicates
        df["keyword_and_type"] = df["keyword_text"] + df["keyword_match_type"]

        if functions.dfIsEmpty(df):
            continue

        if not functions.dfIsEmpty(feed):
            feed["keyword_and_type"] = feed["keyword_text"] + feed[
                "keyword_match_type"]
            df = df[~df["keyword_and_type"].isin(feed["keyword_and_type"].
                                                 values)]

        feed = getWorstAndBestPerformers(df, feed, target_cpa)

        if functions.dfIsEmpty(feed):
            continue

        # drop duplicate columns
        # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
        feed = feed.loc[:, ~feed.columns.duplicated()]

    if functions.dfIsEmpty(feed):
        return

    if "keyword_and_type" in feed.columns:
        del feed["keyword_and_type"]

    feed['display_from_date'] = datetime.now()

    functions.append_df_to_sql_table(feed, table_name)
Exemplo n.º 10
0
def main(account_id):
    Log("info", "processing search query n-grams feed", "", account_id)

    target_cpa = functions.getTargetCpa(account_id)

    engine = settings.createEngine()

    deleteAllFromTable("search_query_n_gram_feed", account_id, engine)

    feed = None
    for date_range in settings.date_ranges:
        print(date_range)
        df = getQueryPerformance(account_id, date_range)
        df = df.loc[:, ~df.columns.duplicated()]

        print(date_range)
        print(df.shape)

        if functions.dfIsEmpty(df):
            continue

        if not functions.dfIsEmpty(feed):
            df = df[~df["n_gram"].isin(feed["n_gram"].values)]

        feed = getWorstAndBestPerformers(df, feed, target_cpa)

        if feed is None:
            continue

        # drop duplicate columns
        # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
        feed = feed.loc[:, ~feed.columns.duplicated()]

    if functions.dfIsEmpty(feed):
        return

    table_name = "search_query_n_gram_feed"

    feed = functions.createUuid(feed)
    feed = functions.trimDfToTableColumns(feed, table_name)

    feed['display_from_date'] = datetime.now()

    functions.append_df_to_sql_table(feed, table_name)
Exemplo n.º 11
0
def main(account_id):
    Log("info", "processing budget commander data", "", account_id)

    account_performance_by_day = getAccountPerformanceByDay(
        "account_performance_reports", account_id, date_range)

    if functions.dfIsEmpty(account_performance_by_day):
        return

    addBudgetActualGraphDataToDb(account_performance_by_day, account_id)
    def processDf(self, df, date_range):

        if functions.dfIsEmpty(df):
            return

        df = functions.addCalculatedMetricsToDataFrame(df)

        df = self.addSignificance(df)

        return df
def getFeedInfoAllDateRanges(account_id):
    all_df = None

    for date_range in settings.date_ranges:

        df = getAdGroups(account_id, date_range)

        if functions.dfIsEmpty(df):
            print("%s is empty" % (date_range))
            continue

        df = addFeedInfo(df)

        if functions.dfIsEmpty(all_df):
            all_df = df.copy()
        else:
            all_df = all_df.append(df.copy())

    return all_df
def main(account_id):
    Log("info", "processing ad n grams feed", "", account_id)

    target_cpa = functions.getTargetCpa(account_id)

    engine = settings.createEngine()
    table_name = "ad_n_gram_feed"
    functions.deleteAllFromTable(table_name, account_id, engine)

    feed = None
    for date_range in settings.date_ranges:
        df = getNGrams(account_id, date_range)
        df = df.loc[:, ~df.columns.duplicated()]

        if functions.dfIsEmpty(df):
            print("%s is empty" % (date_range))
            continue

        if not functions.dfIsEmpty(feed):
            df = df[~df["n_gram_id"].isin(feed["n_gram_id"].values)]

        feed = getWorstAndBestPerformers(df, feed, target_cpa)

        if functions.dfIsEmpty(feed):
            continue

        # drop duplicate columns
        # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
        feed = feed.loc[:, ~feed.columns.duplicated()]

    if functions.dfIsEmpty(feed):
        print("ad_n_feed is empty")
        return

    print("ad_n_feed length: %s" % (feed.shape[0]))

    feed = functions.createUuid(feed)
    feed = functions.trimDfToTableColumns(feed, table_name)

    feed['display_from_date'] = datetime.now()

    writeToAdvertFeed(feed, table_name, engine)
def main(account_id):
    Log("info", "getting ad performance from the api", "", account_id)

    settings = Settings()
    for date_range in settings.date_ranges:

        report = Report(account_id, date_range, options)

        report.createAccountDirectory()

        report.createReportDirectory()

        report.downloadReport(account_id, options["where_string"])

        df = report.convertCsvToDataframe()

        if functions.dfIsEmpty(df):
            continue

        df = report.basicProcessing(df)

        df = reportSpecificProcessing(df, date_range, account_id)

        deleteExitingData(account_id, date_range, 'ad_performance_reports')

        report.writeDataframeToTable(df, 'ad_performance_reports')

        deleteExitingData(account_id, date_range, 'advert_performance')
        report.writeDataframeToTable(df, 'advert_performance')

    if functions.dfIsEmpty(df):
        return

    if 'advert_id' not in df.columns:
        Log('error', 'advert_id not in df columns', df.columns, account_id)
        return

    df["id"] = df["advert_id"]
    df = addParentId(df, account_id)  # our UUID from the adverts table
    report.writeToEntitiesTable(
        df, report, account_id)  # add the final date range data to adverts
    def reduceBids(self):
        """New bid decision making
        Returns a df with the new bids"""

        df = self.getKeywordsDataframe()
        if functions.dfIsEmpty(df):
            Log('info', "no keywords found. Can't change bids", '',
                self.account_id)
            return

        remaining_spend_forecast = self.total_spend_forecast - float(
            self.this_month_spend)
        spend_vs_remaining_budget_percentage = self.remaining_budget / remaining_spend_forecast

        def updateBid(cpc_bid, reduction_percentage, min_bid=0.1):
            """Df lambda function
            Reduce bid by percentage.
            Accepts reduction_percentage as whole number e.g. 98.
            Checks a min bid limit (optional)
            """
            try:
                cpc_bid = float(cpc_bid)
                cpc_bid = cpc_bid * ((100 - reduction_percentage) / 100)
                if cpc_bid < min_bid:
                    cpc_bid = min_bid
                return cpc_bid
            except ValueError:
                return cpc_bid

        def updateForecast(row):
            """Df lambda function"""
            try:
                reduction = row["new_bid"] / row["cpc_bid"]
            except TypeError:
                return float(row["cpc_bid"])
            return float(((row["cost"] * reduction) / 7) *
                         self.local_dates.days_remaining_in_this_month)

        start_reduction = 10 - int(spend_vs_remaining_budget_percentage * 10)
        for i in range(start_reduction, 10):
            reduction_percentage = i * 5
            # print reduction_percentage
            df["new_bid"] = df["cpc_bid"].apply(
                lambda cpc_bid: updateBid(cpc_bid, reduction_percentage))
            df["forecast"] = df[["cpc_bid", "new_bid", "cost"
                                 ]].apply(lambda row: updateForecast(row),
                                          axis=1)
            # print "Forecast: %s" %(df.forecast.sum())
            if df.forecast.sum() <= self.remaining_budget:
                break
        return df
Exemplo n.º 17
0
def getWorstAndBestPerformers(df, feed, target_cpa):
    worst_performer = getWorstPerformer(df, target_cpa)
    best_performer = getBestPerformer(df, target_cpa)

    this_feed = worst_performer.append(best_performer)

    if functions.dfIsEmpty(feed):
        return this_feed

    this_feed["created_at"] = datetime.now()
    this_feed["updated_at"] = datetime.now()
    this_feed["display_from_date"] = datetime.now()

    return feed.append(this_feed)
Exemplo n.º 18
0
    def createDfWithAllDateRanges(self, account_id):
        all_df = None

        settings = Settings()
        for date_range in settings.date_ranges:

            if date_range == "THIS_MONTH" and LocalDates(
                    account_id).is_first_of_month:
                continue

            report = Report(account_id, date_range, self.options)
            report.createAccountDirectory()
            report.createReportDirectory()
            report.downloadReport(account_id, report.where_string)

            df = report.convertCsvToDataframe()

            df["date_range"] = date_range

            if df.shape[0] == 0:
                print("%s df is empty" % (date_range))
                continue

            if functions.dfIsEmpty(all_df):
                all_df = df.copy()
            else:
                all_df = all_df.append(df.copy())

        if functions.dfIsEmpty(all_df):
            Log("info", "%s report is empty" % (self.report_name), "",
                self.account_id)
            return

        all_df = report.stringifyIds(all_df)

        return all_df.reset_index()
def main(account_id):
    settings = Settings()
    Log("info", "processing ad n-grams", "", account_id)

    df = createFullDataFrame(account_id, settings)

    if functions.dfIsEmpty(df):
        return

    df = df.drop_duplicates()

    table_name = "ad_n_gram_performance"

    deleteFromTable(table_name, account_id, settings.createEngine())

    functions.append_df_to_sql_table(df, table_name)
def populateAdGroupsAndAdGroupPerformance(account_id):
    column_map = {
        "campaign_google_id": "campaign_id",
        "adgroup_name": "name",
        "adgroup_google_id": "google_id",
        "adgroup_status": "status"
    }

    # all data = both performance and adgroups
    all_data_df = getAllData(account_id, column_map)
    if functions.dfIsEmpty(all_data_df):
        print("df is empty")
        return

    # Split into adgroups and adgroup_performance and add to the db
    # adgroups
    adgroups_cols = [column_map[i]
                     for i in column_map] + ["created_at", "updated_at", "id"]
    adgroups = all_data_df[adgroups_cols]
    adgroups["account_id"] = account_id

    deleteAndWriteToAdGroupsTable(adgroups, account_id, "adgroups",
                                  settings.createEngine())

    # adgroup_performance
    adgroup_performance_cols = list(all_data_df.columns)
    for k in column_map:
        adgroup_performance_cols.remove(column_map[k])
    adgroup_performance = all_data_df[adgroup_performance_cols]
    # Note: just add the date range manually for now, but they can be looped through at a later date
    # for date_range in settings.date_ranges:
    date_range = "last_30_days"
    adgroup_performance["date_range"] = date_range
    adgroup_performance["adgroup_id"] = adgroup_performance["id"]
    del adgroup_performance["id"]
    adgroup_performance["account_id"] = account_id

    adgroup_performance["id"] = pd.Series(
        [uuid.uuid1() for i in range(len(adgroup_performance))]).astype(str)

    deleteAndWriteToAdGroupsTable(adgroup_performance,
                                  account_id, "adgroup_performance",
                                  settings.createEngine())
def main(account_id):
    Log("info", "processing adgroup feed", "", account_id)

    table_name = "adgroup_feed"

    deleteFromTable(table_name, account_id)

    df = getFeedInfoAllDateRanges(account_id)

    if functions.dfIsEmpty(df):
        print("df is empty")
        deleteFromTable(table_name, account_id)
        return

    df = functions.createUuid(df)
    df = functions.trimDfToTableColumns(df, table_name)

    df['display_from_date'] = datetime.now()

    functions.append_df_to_sql_table(df, table_name)
def main(account_id):
    Log("info", "getting campaign performance from the api", "", account_id)

    report = Report(account_id, "", options)

    df = report.createDfWithAllDateRanges(account_id)

    if functions.dfIsEmpty(df):
        return

    # remember column headers are as per the download here
    df["campaign_id"] = df.apply(
        lambda row: functions.addIdToDf(account_id, row["Campaign ID"]),
        axis=1)
    df["id"] = df["campaign_id"]

    df = report.basicProcessing(df)

    report.writeToEntitiesTable(df, report, account_id)

    report.writeToPerformanceTable(df, report, account_id)
Exemplo n.º 23
0
def main(account_id):
    Log("info", "getting account performance from the api", "", account_id)
    report = Report(account_id, "last_30_days", options)

    report.createAccountDirectory()

    report.createReportDirectory()

    report.downloadReport(account_id, options["where_string"])

    df = report.convertCsvToDataframe()

    if functions.dfIsEmpty(df):
        return

    df = report.basicProcessing(df)

    df = reportSpecificProcessing(df, account_id)

    deleteExitingData(account_id, options["performance_table_name"])

    report.writeDataframeToTable(df, options["performance_table_name"])
 def updateBids(self, df):
     if functions.dfIsEmpty(df):
         return
     mutations = df.copy()
     mutations["entity_google_id"] = mutations[
         "adgroup_google_id"] + "," + mutations["google_id"]
     mutations["account_id"] = self.account_id
     mutations["type"] = "keyword"
     mutations["action"] = "set"
     mutations["attribute"] = "bid"
     mutations["value"] = mutations["new_bid"]
     mutations["created_at"] = datetime.now()
     mutations["updated_at"] = datetime.now()
     mutations = mutations[[
         "entity_google_id", "entity_id", "account_id", "type", "action",
         "attribute", "value", "created_at", "updated_at"
     ]]
     mutations = mutations.reset_index(drop=True)
     mutations["id"] = pd.Series(
         [uuid.uuid1() for i in range(len(mutations))]).astype(str)
     print("updating %s bids" % mutations.shape[0])
     # print mutations["entity_id"]
     Database().appendDataframe("mutations", mutations)
def main(account_id):
    Log("info", "getting search query performance from the api", "",
        account_id)

    report = Report(account_id, "", options)

    df = report.createDfWithAllDateRanges(account_id)

    if functions.dfIsEmpty(df):
        return

    df = df.groupby(['Search term', 'date_range'], as_index=False).sum()

    df["search_query_id"] = df.apply(
        lambda row: functions.addIdToDf(account_id, row["Search term"]),
        axis=1)
    df["id"] = df["search_query_id"]

    df = report.basicProcessing(df)

    report.writeToEntitiesTable(df, report, account_id)
    report.writeToPerformanceTable(df, report, account_id)

    return df
def addNGrams(search_query_performance_df, columns):
    if functions.dfIsEmpty(search_query_performance_df):
        return

    n_gram_dict = {}

    for i, row in search_query_performance_df.fillna(0).iterrows():
        impressions = float(row['impressions'])
        clicks = float(row['clicks'])

        conversions = float(row['conversions'])
        conversion_value = float(row['conversion_value'])
        cost = float(row['cost'])
        text = (row['query'])

        # tidy up
        puncts = [",", ".", "!", "?", ":"]
        for punct in puncts:
            text = text.replace(punct, "")
        text = word_tokenize(text)

        bigram = bigrams(text)
        bigram_vec = []
        for gram in bigram:
            bigram_vec.append(gram)
        trigram = trigrams(text)
        trigram_vec = []
        for gram in trigram:
            trigram_vec.append(gram)
        total_gram_vec = bigram_vec + trigram_vec
        for gram in total_gram_vec:
            if gram not in n_gram_dict.keys():
                n_gram_dict[gram] = {'impressions': impressions, \
                                     #  'avg_pos_mult': impressions * avg_pos, \
                                     'gram_count': 1, 'clicks': clicks, \
                                     'cost': cost, 'conversions': conversions, 'conversion_value': conversion_value}
            else:
                n_gram_dict[gram]['impressions'] += impressions
                # n_gram_dict[gram]['avg_pos_mult'] += impressions * avg_pos
                n_gram_dict[gram]['gram_count'] += 1
                n_gram_dict[gram]['clicks'] += clicks
                n_gram_dict[gram]['cost'] += cost
                n_gram_dict[gram]['conversions'] += conversions
                n_gram_dict[gram]['conversion_value'] += conversion_value

    ### compute average position
    ### and statistic data

    n_gram_df_data = {}

    for gram in n_gram_dict.keys():
        impressions = n_gram_dict[gram]['impressions']
        count = n_gram_dict[gram]['gram_count']
        # avg_pos = n_gram_dict[gram]['avg_pos_mult'] / count
        clicks = n_gram_dict[gram]['clicks']
        conversions = n_gram_dict[gram]['conversions']
        cost = n_gram_dict[gram]['cost']
        conversion_value = n_gram_dict[gram]['conversion_value']

        try:
            cpa = cost / conversions
        except ZeroDivisionError:
            cpa = 0

        try:
            roas = conversion_value / cost
        except ZeroDivisionError:
            roas = 0

        try:
            ctr = clicks / impressions
        except ZeroDivisionError:
            ctr = 0
        try:
            conversion_rate = conversions / clicks
        except ZeroDivisionError:
            conversion_rate = 0
        try:
            average_cpc = cost / clicks
        except ZeroDivisionError:
            average_cpc = 0
        if clicks != 0 and clicks != 1:
            std = np.sqrt(clicks * (1 - ctr) ** 2 + \
                          (impressions - clicks) * ctr ** 2) / (impressions - 1)
            standard_error = std / np.sqrt(impressions)
        else:
            standard_error = 0
        min_result = ctr - standard_error * 2
        max_result = ctr + standard_error * 2
        n_gram_df_data[gram] = {'n_gram_count': count, 'impressions': impressions,
                                'ctr': ctr, 'conversion_rate': conversion_rate, \
                                'average_cpc': average_cpc, 'ctr_significance': standard_error,
                                'conversions': conversions,
                                'cost': cost, 'conversion_value': conversion_value, 'cpa': cpa, 'roas': roas,
                                'clicks': clicks}

    df = pd.DataFrame(n_gram_df_data)
    df = df.T

    df["ctr_significance"] = df["ctr_significance"].replace(
        r'^\s*$', 0, regex=True).astype("float")

    return df
def main(account_id):
    settings = Settings()

    deleteFromSearchQueryNGrams(settings, account_id)

    columns = [
        "id", "created_at", "updated_at", "account_id", "date_range", "n_gram",
        "n_gram_count", "show_on_graph", "ctr", "ctr_significance",
        "impressions", "clicks", "conversions", "cost", "average_cpc",
        "conversion_value", "cpa", "roas", "conversion_rate", "graph_order"
    ]

    settings = Settings()

    all_df = None
    for date_range in settings.date_ranges:

        df = dataFrameFromSearchQueryPerformanceReports(
            settings, account_id, date_range)
        df = addNGrams(df, columns)

        if functions.dfIsEmpty(df):
            continue

        df = filterAndAddColumns(df, date_range, account_id)

        # print date_range
        # print df.shape

        if functions.dfIsEmpty(df):
            continue
        # drop duplicate columns
        # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns
        df = df.loc[:, ~df.columns.duplicated()]

        if functions.dfIsEmpty(df):
            continue

        if functions.dfIsEmpty(all_df):
            all_df = df.copy()
        else:
            all_df = all_df.append(df.copy())

    if functions.dfIsEmpty(all_df):
        return

    # print search_query_performance_df.head()

    # serving too many rows will cause php memory issues (and issues with the client)
    # just serve the first X rows, ordered by cost
    rowsToServe = 2000
    all_df = all_df.sort_values("cost", ascending=False).head(rowsToServe)

    # then sort by ctr and search_queryd the graph elements (means the n-grams must be in the top 1000 spenders to be in the graph)
    all_df = all_df.sort_values(["ctr", "conversions", "clicks", "cost"],
                                ascending=False).reset_index(drop=True)
    all_df.loc[all_df.index == 0, 'show_on_graph'] = True
    all_df.loc[all_df.index == 1, 'show_on_graph'] = True
    all_df.loc[all_df.index == 2, 'show_on_graph'] = True
    all_df.loc[all_df.index == 0, 'graph_order'] = 1
    all_df.loc[all_df.index == 1, 'graph_order'] = 2
    all_df.loc[all_df.index == 2, 'graph_order'] = 3

    # finally fill NAs and append to the table
    all_df = all_df[columns].fillna(0)

    functions.append_df_to_sql_table(all_df, "search_query_n_gram_performance")
def main(account_id):
    Log("info", "processing account performance changes", "", account_id)

    ad_performance_df = dataFrameFromAdPerformanceReports(settings, account_id)

    if functions.dfIsEmpty(ad_performance_df):
        return

    # returns a df with the past x (60 at present) days data
    df = createXDaysDataFrame(ad_performance_df, settings, account_id)

    # add the graph data to a dict
    # first add the the calculated metrics (ctr, etc) - these will make up the graph data
    df = addCalculatedMetrics(df, settings)
    df = df.replace([np.inf, -np.inf], 0)

    previousDf = df.iloc[:30].sort_index().fillna(0).round(2)
    currentDf = df.iloc[30:].sort_index().fillna(0).round(2)

    # we'll store everything in this dict
    mets = getGraphDataFromDf(previousDf, currentDf, settings)

    # add the comparisons - the baseline amount and the -/+ amount
    # compare previousDf to currentDf to get the +/- numbers
    # was, now, diff

    # sum the dfs, combining the dates
    previousDf = previousDf.sum()
    currentDf = currentDf.sum()
    previousDf = addCalculatedMetrics(previousDf, settings).fillna(0)
    currentDf = addCalculatedMetrics(currentDf, settings).fillna(0)

    del previousDf["account_id"]
    del currentDf["account_id"]

    # add the comparisons (i.e. previous Vs current performance)...
    comp = pd.concat([currentDf, previousDf], axis=1)
    comp["vs"] = comp.apply(lambda row: returnComparison(row), axis=1)
    comp = comp.T.reset_index()
    comp = comp[comp["index"] == "vs"].reset_index(drop=True)

    # bring it all together!
    metrics = ["cpa", "ctr", "roas", "conversion_rate"]
    valueNames = list(settings.metrics)
    account_performance_changes = df.groupby("account_id").sum().reset_index()[["account_id"] + metrics + valueNames]
    # add the vs metrics
    for metric in metrics:
        account_performance_changes[metric] = comp[metric].values[0]
    # add the graph data
    for graph in mets:
        account_performance_changes[graph] = str(mets[graph])

    # add the baseline (overall performance)
    for metric in settings.calculatedMetrics:
        if metric == "average_cpc":
            continue
        baseline_metric = metric + "_baseline"
        if settings.calculatedMetrics[metric][1] == "/":
            account_performance_changes[baseline_metric] = account_performance_changes[
                                                               settings.calculatedMetrics[metric][0]] / \
                                                           account_performance_changes[
                                                               settings.calculatedMetrics[metric][2]]
        if settings.calculatedMetrics[metric][3]:
            account_performance_changes[baseline_metric] = account_performance_changes[baseline_metric] * 100

    account_performance_changes = account_performance_changes.fillna(0).round(2)
    basline_metrics = [i + "_baseline" for i in metrics]
    graph_metrics = [i + "_graph_data" for i in metrics]
    account_performance_changes = account_performance_changes[
        ["account_id"] + metrics + basline_metrics + graph_metrics]
    account_performance_changes["date_range"] = "last_30_days"
    account_performance_changes["id"] = pd.Series(
        [uuid.uuid1() for i in range(len(account_performance_changes))]).astype(str)
    account_performance_changes["created_at"] = datetime.now()
    account_performance_changes["updated_at"] = datetime.now()

    for metric in graph_metrics:
        account_performance_changes[metric] = account_performance_changes[metric].str.replace("'", '"')

    account_performance_changes = account_performance_changes.replace([np.inf, -np.inf], 0)

    account_performance_changes.to_sql(
        "account_performance_changes", settings.createEngine(), if_exists='append', index=False
    )

    return account_performance_changes
    def process(self):
        """A place to group the core actions. Delete existing data, process and replace"""

        self.deleteExisting()
        df_chunks = self.getAdvertsDataframe()
        ldf = None
        while True:

            try:
                df = next(df_chunks)
            except StopIteration:
                return

            for line in self.lines:
                this_df = df[[
                    "clicks", "impressions", self.group_by_column, line
                ]].groupby([self.group_by_column, line]).sum()
                this_df["ctr"] = (this_df.clicks / this_df.impressions) * 100
                adgroup_ids = list(this_df.index.levels[0])
                this_df = this_df.reset_index()

                if this_df.shape[0] == 0:
                    continue

                # start campaign ids loop
                for i, adgroup_id in enumerate(adgroup_ids):
                    # if i!=5:continue
                    tdf = this_df.copy()
                    quantile = tdf[(tdf[self.group_by_column] == adgroup_id
                                    )].impressions.quantile()
                    tdf = tdf[(tdf[self.group_by_column] == adgroup_id)
                              & (tdf.impressions > quantile) &
                              (tdf[line] != "/")].sort_values(
                                  "ctr", ascending=False).head(3).reset_index(
                                      drop=True)

                    if tdf.shape[0] == 0:
                        continue

                    tdf["order"] = tdf.index + 1
                    tdf["type"] = line
                    tdf.rename(columns={line: "value"}, inplace=True)
                    tdf = tdf[[self.group_by_column, "value", "order", "type"]]

                    try:
                        ldf = ldf.append(tdf)
                    except:
                        ldf = tdf.copy()

            if functions.dfIsEmpty(ldf):
                return

            ldf["account_id"] = self.account_id
            ldf = ldf.reset_index(drop=True)
            ldf.value = ldf.value.str.replace(" --", "")
            ldf["id"] = pd.Series([uuid.uuid1()
                                   for i in range(len(ldf))]).astype(str)
            ldf["created_at"] = datetime.now()
            ldf["updated_at"] = datetime.now()
            ldf["date_range"] = self.date_range
            ldf.to_sql(self.table_name,
                       Database().createEngine(),
                       if_exists='append',
                       index=False)