def writeToAdvertFeed(df, table_name, engine): columns = list( pd.read_sql("select * from %s where id = '999999999'" % (table_name), settings.createEngine()).columns) for col in columns: if col not in df.columns: df[col] = None functions.append_df_to_sql_table(df[columns], table_name)
def writeToAdvertPerformanceTable(self, df, date_range): table_name = "advert_performance" columns = list( pd.read_sql( "select * from %s where clicks > 999999999" % (table_name), Database().createEngine()).columns) for col in columns: if col not in df.columns: df[col] = None functions.append_df_to_sql_table(df[columns], table_name)
def main(account_id): settings = Settings() Log("info", "processing ad n-grams", "", account_id) df = createFullDataFrame(account_id, settings) if functions.dfIsEmpty(df): return df = df.drop_duplicates() table_name = "ad_n_gram_performance" deleteFromTable(table_name, account_id, settings.createEngine()) functions.append_df_to_sql_table(df, table_name)
def main(account_id): Log("info", "processing keyword feed", "", account_id) target_cpa = functions.getTargetCpa(account_id) table_name = "keyword_feed" deleteAccountDataFromTable(account_id, table_name, settings) feed = None for date_range in settings.date_ranges: df = getKeywords(account_id, date_range) df = df.loc[:, ~df.columns.duplicated()] # print date_range # print df.shape # to check for duplicates df["keyword_and_type"] = df["keyword_text"] + df["keyword_match_type"] if functions.dfIsEmpty(df): continue if not functions.dfIsEmpty(feed): feed["keyword_and_type"] = feed["keyword_text"] + feed[ "keyword_match_type"] df = df[~df["keyword_and_type"].isin(feed["keyword_and_type"]. values)] feed = getWorstAndBestPerformers(df, feed, target_cpa) if functions.dfIsEmpty(feed): continue # drop duplicate columns # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns feed = feed.loc[:, ~feed.columns.duplicated()] if functions.dfIsEmpty(feed): return if "keyword_and_type" in feed.columns: del feed["keyword_and_type"] feed['display_from_date'] = datetime.now() functions.append_df_to_sql_table(feed, table_name)
def main(account_id): Log("info", "processing search query n-grams feed", "", account_id) target_cpa = functions.getTargetCpa(account_id) engine = settings.createEngine() deleteAllFromTable("search_query_n_gram_feed", account_id, engine) feed = None for date_range in settings.date_ranges: print(date_range) df = getQueryPerformance(account_id, date_range) df = df.loc[:, ~df.columns.duplicated()] print(date_range) print(df.shape) if functions.dfIsEmpty(df): continue if not functions.dfIsEmpty(feed): df = df[~df["n_gram"].isin(feed["n_gram"].values)] feed = getWorstAndBestPerformers(df, feed, target_cpa) if feed is None: continue # drop duplicate columns # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns feed = feed.loc[:, ~feed.columns.duplicated()] if functions.dfIsEmpty(feed): return table_name = "search_query_n_gram_feed" feed = functions.createUuid(feed) feed = functions.trimDfToTableColumns(feed, table_name) feed['display_from_date'] = datetime.now() functions.append_df_to_sql_table(feed, table_name)
def main(account_id): Log("info", "processing adgroup feed", "", account_id) table_name = "adgroup_feed" deleteFromTable(table_name, account_id) df = getFeedInfoAllDateRanges(account_id) if functions.dfIsEmpty(df): print("df is empty") deleteFromTable(table_name, account_id) return df = functions.createUuid(df) df = functions.trimDfToTableColumns(df, table_name) df['display_from_date'] = datetime.now() functions.append_df_to_sql_table(df, table_name)
def deleteAndWriteToAdGroupsTable(df, account_id, table_name, engine): functions.deleteThisAccountsDataFromTable(table_name, account_id, engine) functions.append_df_to_sql_table(df, table_name)
def writeDataframeToTable(self, df, database_table_name): df = functions.trimDfToTableColumns(df, database_table_name) functions.append_df_to_sql_table(df, database_table_name)
def main(account_id): settings = Settings() deleteFromSearchQueryNGrams(settings, account_id) columns = [ "id", "created_at", "updated_at", "account_id", "date_range", "n_gram", "n_gram_count", "show_on_graph", "ctr", "ctr_significance", "impressions", "clicks", "conversions", "cost", "average_cpc", "conversion_value", "cpa", "roas", "conversion_rate", "graph_order" ] settings = Settings() all_df = None for date_range in settings.date_ranges: df = dataFrameFromSearchQueryPerformanceReports( settings, account_id, date_range) df = addNGrams(df, columns) if functions.dfIsEmpty(df): continue df = filterAndAddColumns(df, date_range, account_id) # print date_range # print df.shape if functions.dfIsEmpty(df): continue # drop duplicate columns # from here: https://stackoverflow.com/questions/14984119/python-pandas-remove-duplicate-columns df = df.loc[:, ~df.columns.duplicated()] if functions.dfIsEmpty(df): continue if functions.dfIsEmpty(all_df): all_df = df.copy() else: all_df = all_df.append(df.copy()) if functions.dfIsEmpty(all_df): return # print search_query_performance_df.head() # serving too many rows will cause php memory issues (and issues with the client) # just serve the first X rows, ordered by cost rowsToServe = 2000 all_df = all_df.sort_values("cost", ascending=False).head(rowsToServe) # then sort by ctr and search_queryd the graph elements (means the n-grams must be in the top 1000 spenders to be in the graph) all_df = all_df.sort_values(["ctr", "conversions", "clicks", "cost"], ascending=False).reset_index(drop=True) all_df.loc[all_df.index == 0, 'show_on_graph'] = True all_df.loc[all_df.index == 1, 'show_on_graph'] = True all_df.loc[all_df.index == 2, 'show_on_graph'] = True all_df.loc[all_df.index == 0, 'graph_order'] = 1 all_df.loc[all_df.index == 1, 'graph_order'] = 2 all_df.loc[all_df.index == 2, 'graph_order'] = 3 # finally fill NAs and append to the table all_df = all_df[columns].fillna(0) functions.append_df_to_sql_table(all_df, "search_query_n_gram_performance")